From ef00cc720b0e10b54067c955eb9ee3393ace7114 Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 12 Aug 2025 16:30:41 -0400 Subject: [PATCH] flow to support passing filters --- docker-compose.yml | 1 + flows/gendb_agent.json | 261 ++++++++++++++++++++++++----------- frontend/src/app/page.tsx | 7 +- src/services/chat_service.py | 45 ++++++ 4 files changed, 234 insertions(+), 80 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 19d351f3..7c6d740b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -94,5 +94,6 @@ services: - LANGFLOW_LOAD_FLOWS_PATH=/app/flows - LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY} - JWT="dummy" + - GENDB-QUERY-FILTER="{}" - LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT - LANGFLOW_LOG_LEVEL=DEBUG diff --git a/flows/gendb_agent.json b/flows/gendb_agent.json index 95936bda..302054bc 100644 --- a/flows/gendb_agent.json +++ b/flows/gendb_agent.json @@ -34,28 +34,56 @@ "className": "", "data": { "sourceHandle": { - "dataType": "OpenSearch", - "id": "OpenSearch-iYfjf", - "name": "component_as_tool", + "dataType": "ChatInput", + "id": "ChatInput-bqH7H", + "name": "message", "output_types": [ - "Tool" + "Message" ] }, "targetHandle": { - "fieldName": "tools", + "fieldName": "input_value", "id": "Agent-crjWf", "inputTypes": [ - "Tool" + "Message" ], - "type": "other" + "type": "str" } }, - "id": "xy-edge__OpenSearch-iYfjf{œdataTypeœ:œOpenSearchœ,œidœ:œOpenSearch-iYfjfœ,œnameœ:œcomponent_as_toolœ,œoutput_typesœ:[œToolœ]}-Agent-crjWf{œfieldNameœ:œtoolsœ,œidœ:œAgent-crjWfœ,œinputTypesœ:[œToolœ],œtypeœ:œotherœ}", + "id": "xy-edge__ChatInput-bqH7H{œdataTypeœ:œChatInputœ,œidœ:œChatInput-bqH7Hœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-Agent-crjWf{œfieldNameœ:œinput_valueœ,œidœ:œAgent-crjWfœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", "selected": false, - "source": "OpenSearch-iYfjf", - "sourceHandle": "{œdataTypeœ:œOpenSearchœ,œidœ:œOpenSearch-iYfjfœ,œnameœ:œcomponent_as_toolœ,œoutput_typesœ:[œToolœ]}", + "source": "ChatInput-bqH7H", + "sourceHandle": "{œdataTypeœ:œChatInputœ,œidœ:œChatInput-bqH7Hœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}", "target": "Agent-crjWf", - "targetHandle": "{œfieldNameœ:œtoolsœ,œidœ:œAgent-crjWfœ,œinputTypesœ:[œToolœ],œtypeœ:œotherœ}" + "targetHandle": "{œfieldNameœ:œinput_valueœ,œidœ:œAgent-crjWfœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}" + }, + { + "animated": false, + "className": "", + "data": { + "sourceHandle": { + "dataType": "TextInput", + "id": "TextInput-aHsQb", + "name": "text", + "output_types": [ + "Message" + ] + }, + "targetHandle": { + "fieldName": "filter_expression", + "id": "OpenSearch-iYfjf", + "inputTypes": [ + "Message" + ], + "type": "str" + } + }, + "id": "xy-edge__TextInput-aHsQb{œdataTypeœ:œTextInputœ,œidœ:œTextInput-aHsQbœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-OpenSearch-iYfjf{œfieldNameœ:œfilter_expressionœ,œidœ:œOpenSearch-iYfjfœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "selected": false, + "source": "TextInput-aHsQb", + "sourceHandle": "{œdataTypeœ:œTextInputœ,œidœ:œTextInput-aHsQbœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}", + "target": "OpenSearch-iYfjf", + "targetHandle": "{œfieldNameœ:œfilter_expressionœ,œidœ:œOpenSearch-iYfjfœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}" }, { "animated": false, @@ -88,32 +116,29 @@ "targetHandle": "{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-BMVN5œ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}" }, { - "animated": false, - "className": "", "data": { "sourceHandle": { - "dataType": "ChatInput", - "id": "ChatInput-bqH7H", - "name": "message", + "dataType": "OpenSearchHybrid", + "id": "OpenSearch-iYfjf", + "name": "component_as_tool", "output_types": [ - "Message" + "Tool" ] }, "targetHandle": { - "fieldName": "input_value", + "fieldName": "tools", "id": "Agent-crjWf", "inputTypes": [ - "Message" + "Tool" ], - "type": "str" + "type": "other" } }, - "id": "xy-edge__ChatInput-bqH7H{œdataTypeœ:œChatInputœ,œidœ:œChatInput-bqH7Hœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-Agent-crjWf{œfieldNameœ:œinput_valueœ,œidœ:œAgent-crjWfœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", - "selected": false, - "source": "ChatInput-bqH7H", - "sourceHandle": "{œdataTypeœ:œChatInputœ,œidœ:œChatInput-bqH7Hœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}", + "id": "xy-edge__OpenSearch-iYfjf{œdataTypeœ:œOpenSearchHybridœ,œidœ:œOpenSearch-iYfjfœ,œnameœ:œcomponent_as_toolœ,œoutput_typesœ:[œToolœ]}-Agent-crjWf{œfieldNameœ:œtoolsœ,œidœ:œAgent-crjWfœ,œinputTypesœ:[œToolœ],œtypeœ:œotherœ}", + "source": "OpenSearch-iYfjf", + "sourceHandle": "{œdataTypeœ:œOpenSearchHybridœ,œidœ:œOpenSearch-iYfjfœ,œnameœ:œcomponent_as_toolœ,œoutput_typesœ:[œToolœ]}", "target": "Agent-crjWf", - "targetHandle": "{œfieldNameœ:œinput_valueœ,œidœ:œAgent-crjWfœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}" + "targetHandle": "{œfieldNameœ:œtoolsœ,œidœ:œAgent-crjWfœ,œinputTypesœ:[œToolœ],œtypeœ:œotherœ}" } ], "nodes": [ @@ -754,8 +779,8 @@ "beta": false, "conditional_paths": [], "custom_fields": {}, - "description": "Use raw opensearch-py + JVector KNN", - "display_name": "OpenSearch", + "description": "Hybrid search: KNN + keyword, with optional filters, min_score, and aggregations.", + "display_name": "OpenSearch (Hybrid)", "documentation": "", "edited": true, "field_order": [ @@ -765,9 +790,9 @@ "search_query", "should_cache_vector_store", "embedding", - "use_jvector", "vector_field", "number_of_results", + "filter_expression", "auth_mode", "username", "password", @@ -775,12 +800,11 @@ "jwt_header", "bearer_prefix", "use_ssl", - "verify_certs", - "hybrid_search_query" + "verify_certs" ], "frozen": false, "icon": "OpenSearch", - "last_updated": "2025-08-12T02:45:51.915Z", + "last_updated": "2025-08-12T20:26:57.561Z", "legacy": false, "metadata": {}, "minimized": false, @@ -867,7 +891,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from __future__ import annotations\n\nimport json\nfrom typing import Any, Dict\n\nfrom opensearchpy import OpenSearch, helpers\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom langflow.io import (\n BoolInput,\n HandleInput,\n IntInput,\n MultilineInput,\n SecretStrInput,\n StrInput,\n DropdownInput,\n)\nfrom langflow.schema.data import Data\n\n\n@vector_store_connection\nclass OpenSearchRawJVectorComponent(LCVectorStoreComponent):\n \"\"\"OpenSearch + JVector with Basic/JWT auth and dynamic UI.\"\"\"\n display_name: str = \"OpenSearch\"\n name: str = \"OpenSearch\"\n icon: str = \"OpenSearch\"\n description: str = \"Use raw opensearch-py + JVector KNN\"\n\n # Optional: keys we consider baseline in the form (useful if you later prune config)\n default_keys: list[str] = [\n \"opensearch_url\",\n \"index_name\",\n *[i.name for i in LCVectorStoreComponent.inputs],\n \"embedding\",\n \"use_jvector\",\n \"vector_field\",\n \"number_of_results\",\n \"auth_mode\",\n \"username\",\n \"password\",\n \"jwt_token\",\n \"jwt_header\",\n \"bearer_prefix\",\n \"use_ssl\",\n \"verify_certs\",\n \"hybrid_search_query\",\n ]\n\n inputs = [\n StrInput(\n name=\"opensearch_url\",\n display_name=\"OpenSearch URL\",\n value=\"http://localhost:9200\",\n info=\"URL for your OpenSearch cluster.\"\n ),\n StrInput(\n name=\"index_name\",\n display_name=\"Index Name\",\n value=\"langflow\",\n info=\"The index where vectors live.\"\n ),\n *LCVectorStoreComponent.inputs, # includes search_query, add_documents, etc.\n HandleInput(\n name=\"embedding\",\n display_name=\"Embedding\",\n input_types=[\"Embeddings\"]\n ),\n BoolInput(\n name=\"use_jvector\",\n display_name=\"Use JVector KNN Search\",\n value=True,\n info=\"Toggle raw JVector knn vs. fallback search.\"\n ),\n StrInput(\n name=\"vector_field\",\n display_name=\"Vector Field\",\n value=\"chunk_embedding\",\n advanced=True,\n info=\"Name of the JVector field in your index.\"\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Results\",\n value=10,\n advanced=True,\n info=\"How many hits to return.\"\n ),\n\n # ----- Auth controls (dynamic) -----\n DropdownInput(\n name=\"auth_mode\",\n display_name=\"Auth Mode\",\n value=\"basic\",\n options=[\"basic\", \"jwt\"],\n info=\"Choose Basic (username/password) or JWT (Bearer token).\",\n real_time_refresh=True, # triggers update_build_config\n advanced=False,\n ),\n StrInput(\n name=\"username\",\n display_name=\"Username\",\n value=\"admin\",\n show=True, # visible when auth_mode == \"basic\"\n ),\n SecretStrInput(\n name=\"password\",\n display_name=\"Password\",\n value=\"admin\",\n show=True, # visible when auth_mode == \"basic\"\n ),\n SecretStrInput(\n name=\"jwt_token\",\n display_name=\"JWT Token\",\n value=\"\",\n show=False, # visible when auth_mode == \"jwt\"\n info=\"Paste a valid JWT (will be sent as a header).\",\n ),\n StrInput(\n name=\"jwt_header\",\n display_name=\"JWT Header Name\",\n value=\"Authorization\",\n show=False, # visible when auth_mode == \"jwt\"\n advanced=True,\n ),\n BoolInput(\n name=\"bearer_prefix\",\n display_name=\"Prefix 'Bearer '\",\n value=True,\n show=False, # visible when auth_mode == \"jwt\"\n advanced=True,\n ),\n\n # ----- TLS + hybrid (unchanged) -----\n BoolInput(\n name=\"use_ssl\",\n display_name=\"Use SSL\",\n value=True,\n advanced=True\n ),\n BoolInput(\n name=\"verify_certs\",\n display_name=\"Verify Certificates\",\n value=False,\n advanced=True\n ),\n MultilineInput(\n name=\"hybrid_search_query\",\n display_name=\"Hybrid Search Query\",\n value=\"\",\n advanced=True,\n info=\"Raw JSON for combining vector + keyword search.\"\n ),\n ]\n\n def _build_auth_kwargs(self) -> Dict[str, Any]:\n \"\"\"Compute auth-related kwargs for OpenSearch client.\"\"\"\n mode = (self.auth_mode or \"basic\").strip().lower()\n if mode == \"jwt\":\n token = (self.jwt_token or \"\").strip()\n if not token:\n raise ValueError(\"Auth Mode is 'jwt' but no jwt_token was provided.\")\n header_name = (self.jwt_header or \"Authorization\").strip()\n header_value = f\"Bearer {token}\" if self.bearer_prefix else token\n return {\"headers\": {header_name: header_value}}\n # default: basic\n user = (self.username or \"\").strip()\n pwd = (self.password or \"\").strip()\n if not user or not pwd:\n raise ValueError(\"Auth Mode is 'basic' but username/password are missing.\")\n return {\"http_auth\": (user, pwd)}\n\n def build_client(self) -> OpenSearch:\n auth_kwargs = self._build_auth_kwargs()\n return OpenSearch(\n hosts=[self.opensearch_url],\n use_ssl=self.use_ssl,\n verify_certs=self.verify_certs,\n ssl_assert_hostname=False,\n ssl_show_warn=False,\n **auth_kwargs, # Basic or JWT, mutually exclusive\n )\n\n @check_cached_vector_store\n def build_vector_store(self) -> OpenSearch:\n # We return the raw OpenSearch client as our “vector store.”\n return self.build_client()\n\n def _add_documents_to_vector_store(self, client: OpenSearch) -> None:\n docs = self._prepare_ingest_data() or []\n if not docs:\n self.log(\"No documents to ingest.\")\n return\n\n # Embed all docs in batch\n texts = [d.to_lc_document().page_content for d in docs]\n vectors = self.embedding.embed_documents(texts)\n\n actions = []\n for doc_obj, vec in zip(docs, vectors):\n lc_doc = doc_obj.to_lc_document()\n body = {\n **lc_doc.metadata,\n \"text\": lc_doc.page_content,\n self.vector_field: vec,\n }\n actions.append({\n \"_op_type\": \"index\",\n \"_index\": self.index_name,\n \"_source\": body,\n })\n\n self.log(f\"Indexing {len(actions)} docs into '{self.index_name}'…\")\n helpers.bulk(client, actions)\n\n def search(self, query: str | None = None) -> list[dict[str, Any]]:\n client = self.build_client()\n q = (query or \"\").strip()\n size = self.number_of_results\n\n # 1) Raw JVector KNN\n if self.use_jvector:\n vec = self.embedding.embed_query(q)\n body = {\n \"query\": {\n \"knn\": {\n self.vector_field: {\n \"vector\": vec,\n \"k\": size\n }\n }\n },\n \"_source\": [\"*\"],\n \"size\": size,\n }\n self.log(f\"Running JVector KNN on '{self.vector_field}' (k={size})\")\n resp = client.search(index=self.index_name, body=body)\n hits = resp.get(\"hits\", {}).get(\"hits\", [])\n return [\n {\n \"page_content\": hit[\"_source\"].get(\"text\", \"\"),\n \"metadata\": {k: v for k, v in hit[\"_source\"].items() if k != \"text\"},\n \"score\": hit.get(\"_score\"),\n }\n for hit in hits\n ]\n\n # 2) Hybrid JSON path\n if self.hybrid_search_query.strip():\n try:\n hybrid = json.loads(self.hybrid_search_query)\n except json.JSONDecodeError as e:\n raise ValueError(f\"Invalid hybrid JSON: {e}\") from e\n resp = client.search(index=self.index_name, body=hybrid)\n hits = resp.get(\"hits\", {}).get(\"hits\", [])\n return [\n {\n \"page_content\": h[\"_source\"].get(\"text\", \"\"),\n \"metadata\": h[\"_source\"],\n }\n for h in hits\n ]\n\n # 3) Fallback: match_all\n resp = client.search(\n index=self.index_name,\n body={\"query\": {\"match_all\": {}}, \"size\": size}\n )\n hits = resp.get(\"hits\", {}).get(\"hits\", [])\n return [\n {\n \"page_content\": h[\"_source\"].get(\"text\", \"\"),\n \"metadata\": h[\"_source\"],\n }\n for h in hits\n ]\n\n def search_documents(self) -> list[Data]:\n try:\n raw = self.search(self.search_query or \"\")\n return [\n Data(file_path=hit[\"metadata\"].get(\"file_path\", \"\"), text=hit[\"page_content\"])\n for hit in raw\n ]\n except Exception as e:\n self.log(f\"search_documents error: {e}\")\n raise\n\n # -------- dynamic UI handling --------\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n \"\"\"\n Dynamically toggle visibility/requirements for Basic vs JWT.\n Called by the UI whenever a real_time_refresh input changes.\n \"\"\"\n try:\n if field_name == \"auth_mode\":\n mode = (field_value or \"basic\").strip().lower()\n\n is_basic = mode == \"basic\"\n is_jwt = mode == \"jwt\"\n\n # toggle visibility\n build_config[\"username\"][\"show\"] = is_basic\n build_config[\"password\"][\"show\"] = is_basic\n\n build_config[\"jwt_token\"][\"show\"] = is_jwt\n build_config[\"jwt_header\"][\"show\"] = is_jwt\n build_config[\"bearer_prefix\"][\"show\"] = is_jwt\n\n # toggle required (so hidden fields don't block save/run)\n build_config[\"username\"][\"required\"] = is_basic\n build_config[\"password\"][\"required\"] = is_basic\n\n build_config[\"jwt_token\"][\"required\"] = is_jwt\n build_config[\"jwt_header\"][\"required\"] = is_jwt\n build_config[\"bearer_prefix\"][\"required\"] = False\n\n # optional: clear hidden sensitive fields\n if is_basic:\n # hide jwt: clear token to avoid accidental reuse\n build_config[\"jwt_token\"][\"value\"] = \"\"\n # else: keep username/password values when switching to jwt (less destructive UX)\n\n return build_config\n\n # no-op for other fields\n return build_config\n\n except Exception as e:\n # keep UX resilient—if something goes wrong, don't break the form\n self.log(f\"update_build_config error: {e}\")\n return build_config\n" + "value": "from __future__ import annotations\n\nimport json\nfrom typing import Any, Dict, List\n\nfrom opensearchpy import OpenSearch, helpers\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom langflow.io import (\n BoolInput,\n HandleInput,\n IntInput,\n MultilineInput,\n SecretStrInput,\n StrInput,\n DropdownInput,\n)\nfrom langflow.schema.data import Data\n\n\n@vector_store_connection\nclass OpenSearchHybridComponent(LCVectorStoreComponent):\n \"\"\"OpenSearch hybrid search: KNN (k=10, boost=0.7) + multi_match (boost=0.3) with optional filters & min_score.\"\"\"\n display_name: str = \"OpenSearch (Hybrid)\"\n name: str = \"OpenSearchHybrid\"\n icon: str = \"OpenSearch\"\n description: str = \"Hybrid search: KNN + keyword, with optional filters, min_score, and aggregations.\"\n\n # Keys we consider baseline\n default_keys: list[str] = [\n \"opensearch_url\",\n \"index_name\",\n *[i.name for i in LCVectorStoreComponent.inputs], # search_query, add_documents, etc.\n \"embedding\",\n \"vector_field\",\n \"number_of_results\",\n \"auth_mode\",\n \"username\",\n \"password\",\n \"jwt_token\",\n \"jwt_header\",\n \"bearer_prefix\",\n \"use_ssl\",\n \"verify_certs\",\n \"filter_expression\",\n ]\n\n inputs = [\n StrInput(\n name=\"opensearch_url\",\n display_name=\"OpenSearch URL\",\n value=\"http://localhost:9200\",\n info=\"URL for your OpenSearch cluster.\"\n ),\n StrInput(\n name=\"index_name\",\n display_name=\"Index Name\",\n value=\"langflow\",\n info=\"The index to search.\"\n ),\n *LCVectorStoreComponent.inputs, # includes search_query, add_documents, etc.\n HandleInput(\n name=\"embedding\",\n display_name=\"Embedding\",\n input_types=[\"Embeddings\"]\n ),\n StrInput(\n name=\"vector_field\",\n display_name=\"Vector Field\",\n value=\"chunk_embedding\",\n advanced=True,\n info=\"Vector field used for KNN.\"\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Default Size (limit)\",\n value=10,\n advanced=True,\n info=\"Default number of hits when no limit provided in filter_expression.\"\n ),\n MultilineInput(\n name=\"filter_expression\",\n display_name=\"Filter Expression (JSON)\",\n value=\"\",\n info=(\n \"Optional JSON to control filters/limit/score threshold.\\n\"\n \"Accepted shapes:\\n\"\n '1) {\"filter\": [ {\"term\": {\"filename\":\"foo\"}}, {\"terms\":{\"owner\":[\"u1\",\"u2\"]}} ], \"limit\": 10, \"score_threshold\": 1.6 }\\n'\n '2) Context-style maps: {\"data_sources\":[\"fileA\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"123\"]}\\n'\n \"Placeholders with __IMPOSSIBLE_VALUE__ are ignored.\"\n )\n ),\n\n # ----- Auth controls (dynamic) -----\n DropdownInput(\n name=\"auth_mode\",\n display_name=\"Auth Mode\",\n value=\"basic\",\n options=[\"basic\", \"jwt\"],\n info=\"Choose Basic (username/password) or JWT (Bearer token).\",\n real_time_refresh=True,\n advanced=False,\n ),\n StrInput(\n name=\"username\",\n display_name=\"Username\",\n value=\"admin\",\n show=True,\n ),\n SecretStrInput(\n name=\"password\",\n display_name=\"Password\",\n value=\"admin\",\n show=True,\n ),\n SecretStrInput(\n name=\"jwt_token\",\n display_name=\"JWT Token\",\n value=\"\",\n show=False,\n info=\"Paste a valid JWT (sent as a header).\",\n ),\n StrInput(\n name=\"jwt_header\",\n display_name=\"JWT Header Name\",\n value=\"Authorization\",\n show=False,\n advanced=True,\n ),\n BoolInput(\n name=\"bearer_prefix\",\n display_name=\"Prefix 'Bearer '\",\n value=True,\n show=False,\n advanced=True,\n ),\n\n # ----- TLS -----\n BoolInput(\n name=\"use_ssl\",\n display_name=\"Use SSL\",\n value=True,\n advanced=True\n ),\n BoolInput(\n name=\"verify_certs\",\n display_name=\"Verify Certificates\",\n value=False,\n advanced=True\n ),\n ]\n\n # ---------- auth / client ----------\n def _build_auth_kwargs(self) -> Dict[str, Any]:\n mode = (self.auth_mode or \"basic\").strip().lower()\n if mode == \"jwt\":\n token = (self.jwt_token or \"\").strip()\n if not token:\n raise ValueError(\"Auth Mode is 'jwt' but no jwt_token was provided.\")\n header_name = (self.jwt_header or \"Authorization\").strip()\n header_value = f\"Bearer {token}\" if self.bearer_prefix else token\n return {\"headers\": {header_name: header_value}}\n user = (self.username or \"\").strip()\n pwd = (self.password or \"\").strip()\n if not user or not pwd:\n raise ValueError(\"Auth Mode is 'basic' but username/password are missing.\")\n return {\"http_auth\": (user, pwd)}\n\n def build_client(self) -> OpenSearch:\n auth_kwargs = self._build_auth_kwargs()\n return OpenSearch(\n hosts=[self.opensearch_url],\n use_ssl=self.use_ssl,\n verify_certs=self.verify_certs,\n ssl_assert_hostname=False,\n ssl_show_warn=False,\n **auth_kwargs,\n )\n\n @check_cached_vector_store\n def build_vector_store(self) -> OpenSearch:\n # Return raw OpenSearch client as our “vector store.”\n return self.build_client()\n\n # ---------- ingest ----------\n def _add_documents_to_vector_store(self, client: OpenSearch) -> None:\n docs = self._prepare_ingest_data() or []\n if not docs:\n self.log(\"No documents to ingest.\")\n return\n\n texts = [d.to_lc_document().page_content for d in docs]\n if not self.embedding:\n raise ValueError(\"Embedding handle is required to embed documents.\")\n vectors = self.embedding.embed_documents(texts)\n\n actions = []\n for doc_obj, vec in zip(docs, vectors):\n lc_doc = doc_obj.to_lc_document()\n body = {\n **lc_doc.metadata,\n \"text\": lc_doc.page_content,\n self.vector_field: vec,\n }\n actions.append({\n \"_op_type\": \"index\",\n \"_index\": self.index_name,\n \"_source\": body,\n })\n\n self.log(f\"Indexing {len(actions)} docs into '{self.index_name}'…\")\n helpers.bulk(client, actions)\n\n # ---------- helpers for filters ----------\n def _is_placeholder_term(self, term_obj: dict) -> bool:\n # term_obj like {\"filename\": \"__IMPOSSIBLE_VALUE__\"}\n return any(v == \"__IMPOSSIBLE_VALUE__\" for v in term_obj.values())\n\n def _coerce_filter_clauses(self, filter_obj: dict | None) -> List[dict]:\n \"\"\"\n Accepts either:\n A) {\"filter\":[ ...term/terms objects... ], \"limit\":..., \"score_threshold\":...}\n B) Context-style: {\"data_sources\":[...], \"document_types\":[...], \"owners\":[...]}\n Returns a list of OS filter clauses (term/terms), skipping placeholders and empty terms.\n \"\"\"\n if not filter_obj:\n return []\n\n # Case A: already an explicit list/dict under \"filter\"\n if \"filter\" in filter_obj:\n raw = filter_obj[\"filter\"]\n if isinstance(raw, dict):\n raw = [raw]\n clauses: List[dict] = []\n for f in (raw or []):\n if \"term\" in f and isinstance(f[\"term\"], dict) and not self._is_placeholder_term(f[\"term\"]):\n clauses.append(f)\n elif \"terms\" in f and isinstance(f[\"terms\"], dict):\n field, vals = next(iter(f[\"terms\"].items()))\n if isinstance(vals, list) and len(vals) > 0:\n clauses.append(f)\n return clauses\n\n # Case B: convert context-style maps into clauses\n field_mapping = {\"data_sources\": \"filename\", \"document_types\": \"mimetype\", \"owners\": \"owner\"}\n clauses: List[dict] = []\n for k, values in filter_obj.items():\n if not isinstance(values, list):\n continue\n field = field_mapping.get(k, k)\n if len(values) == 0:\n # Match-nothing placeholder (kept to mirror your tool semantics)\n clauses.append({\"term\": {field: \"__IMPOSSIBLE_VALUE__\"}})\n elif len(values) == 1:\n if values[0] != \"__IMPOSSIBLE_VALUE__\":\n clauses.append({\"term\": {field: values[0]}})\n else:\n clauses.append({\"terms\": {field: values}})\n return clauses\n\n # ---------- search (single hybrid path matching your tool) ----------\n def search(self, query: str | None = None) -> list[dict[str, Any]]:\n client = self.build_client()\n q = (query or \"\").strip()\n\n # Parse optional filter expression (can be either A or B shape; see _coerce_filter_clauses)\n filter_obj = None\n if getattr(self, \"filter_expression\", \"\") and self.filter_expression.strip():\n try:\n self.log(f\"DEBUG FILTER EXPRESSION {self.filter_expression}\")\n filter_obj = json.loads(self.filter_expression)\n except json.JSONDecodeError as e:\n raise ValueError(f\"Invalid filter_expression JSON: {e}\") from e\n\n if not self.embedding:\n raise ValueError(\"Embedding is required to run hybrid search (KNN + keyword).\")\n\n # Embed the query\n vec = self.embedding.embed_query(q)\n\n # Build filter clauses (accept both shapes)\n clauses = self._coerce_filter_clauses(filter_obj)\n\n # Respect the tool's limit/threshold defaults\n limit = (filter_obj or {}).get(\"limit\", self.number_of_results)\n score_threshold = (filter_obj or {}).get(\"score_threshold\", 0)\n\n # Build the same hybrid body as your SearchService\n body = {\n \"query\": {\n \"bool\": {\n \"should\": [\n {\n \"knn\": {\n self.vector_field: {\n \"vector\": vec,\n \"k\": 10, # fixed to match the tool\n \"boost\": 0.7\n }\n }\n },\n {\n \"multi_match\": {\n \"query\": q,\n \"fields\": [\"text^2\", \"filename^1.5\"],\n \"type\": \"best_fields\",\n \"fuzziness\": \"AUTO\",\n \"boost\": 0.3\n }\n }\n ],\n \"minimum_should_match\": 1\n }\n },\n \"aggs\": {\n \"data_sources\": {\"terms\": {\"field\": \"filename\", \"size\": 20}},\n \"document_types\": {\"terms\": {\"field\": \"mimetype\", \"size\": 10}},\n \"owners\": {\"terms\": {\"field\": \"owner\", \"size\": 10}}\n },\n \"_source\": [\n \"filename\", \"mimetype\", \"page\", \"text\", \"source_url\",\n \"owner\", \"allowed_users\", \"allowed_groups\"\n ],\n \"size\": limit\n }\n\n if clauses:\n body[\"query\"][\"bool\"][\"filter\"] = clauses\n\n if isinstance(score_threshold, (int, float)) and score_threshold > 0:\n # top-level min_score (matches your tool)\n body[\"min_score\"] = score_threshold\n\n self.log(f\"DEBUG HYBRID BODY {body}\")\n resp = client.search(index=self.index_name, body=body)\n hits = resp.get(\"hits\", {}).get(\"hits\", [])\n return [\n {\n \"page_content\": hit[\"_source\"].get(\"text\", \"\"),\n \"metadata\": {k: v for k, v in hit[\"_source\"].items() if k != \"text\"},\n \"score\": hit.get(\"_score\"),\n }\n for hit in hits\n ]\n\n def search_documents(self) -> list[Data]:\n try:\n raw = self.search(self.search_query or \"\")\n return [\n Data(file_path=hit[\"metadata\"].get(\"file_path\", \"\"), text=hit[\"page_content\"])\n for hit in raw\n ]\n except Exception as e:\n self.log(f\"search_documents error: {e}\")\n raise\n\n # -------- dynamic UI handling (auth switch) --------\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n try:\n if field_name == \"auth_mode\":\n mode = (field_value or \"basic\").strip().lower()\n is_basic = mode == \"basic\"\n is_jwt = mode == \"jwt\"\n\n build_config[\"username\"][\"show\"] = is_basic\n build_config[\"password\"][\"show\"] = is_basic\n\n build_config[\"jwt_token\"][\"show\"] = is_jwt\n build_config[\"jwt_header\"][\"show\"] = is_jwt\n build_config[\"bearer_prefix\"][\"show\"] = is_jwt\n\n build_config[\"username\"][\"required\"] = is_basic\n build_config[\"password\"][\"required\"] = is_basic\n\n build_config[\"jwt_token\"][\"required\"] = is_jwt\n build_config[\"jwt_header\"][\"required\"] = is_jwt\n build_config[\"bearer_prefix\"][\"required\"] = False\n\n if is_basic:\n build_config[\"jwt_token\"][\"value\"] = \"\"\n\n return build_config\n\n return build_config\n\n except Exception as e:\n self.log(f\"update_build_config error: {e}\")\n return build_config\n" }, "embedding": { "_input_type": "HandleInput", @@ -889,13 +913,13 @@ "type": "other", "value": "" }, - "hybrid_search_query": { + "filter_expression": { "_input_type": "MultilineInput", - "advanced": true, + "advanced": false, "copy_field": false, - "display_name": "Hybrid Search Query", + "display_name": "Filter Expression (JSON)", "dynamic": false, - "info": "Raw JSON for combining vector + keyword search.", + "info": "Optional JSON to control filters/limit/score threshold.\nAccepted shapes:\n1) {\"filter\": [ {\"term\": {\"filename\":\"foo\"}}, {\"terms\":{\"owner\":[\"u1\",\"u2\"]}} ], \"limit\": 10, \"score_threshold\": 1.6 }\n2) Context-style maps: {\"data_sources\":[\"fileA\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"123\"]}\nPlaceholders with __IMPOSSIBLE_VALUE__ are ignored.", "input_types": [ "Message" ], @@ -903,7 +927,7 @@ "list_add_label": "Add More", "load_from_db": false, "multiline": true, - "name": "hybrid_search_query", + "name": "filter_expression", "placeholder": "", "required": false, "show": true, @@ -919,7 +943,7 @@ "advanced": false, "display_name": "Index Name", "dynamic": false, - "info": "The index where vectors live.", + "info": "The index to search.", "list": false, "list_add_label": "Add More", "load_from_db": false, @@ -978,7 +1002,7 @@ "advanced": false, "display_name": "JWT Token", "dynamic": false, - "info": "Paste a valid JWT (will be sent as a header).", + "info": "Paste a valid JWT (sent as a header).", "input_types": [], "load_from_db": true, "name": "jwt_token", @@ -993,9 +1017,9 @@ "number_of_results": { "_input_type": "IntInput", "advanced": true, - "display_name": "Number of Results", + "display_name": "Default Size (limit)", "dynamic": false, - "info": "How many hits to return.", + "info": "Default number of hits when no limit provided in filter_expression.", "list": false, "list_add_label": "Add More", "load_from_db": false, @@ -1113,8 +1137,8 @@ "type": "string" } }, - "description": "Use raw opensearch-py + JVector KNN", - "display_description": "Use raw opensearch-py + JVector KNN", + "description": "Hybrid search: KNN + keyword, with optional filters, min_score, and aggregations.", + "display_description": "Hybrid search: KNN + keyword, with optional filters, min_score, and aggregations.", "display_name": "search_documents", "name": "search_documents", "readonly": false, @@ -1132,8 +1156,8 @@ "type": "string" } }, - "description": "Use raw opensearch-py + JVector KNN", - "display_description": "Use raw opensearch-py + JVector KNN", + "description": "Hybrid search: KNN + keyword, with optional filters, min_score, and aggregations.", + "display_description": "Hybrid search: KNN + keyword, with optional filters, min_score, and aggregations.", "display_name": "as_dataframe", "name": "as_dataframe", "readonly": false, @@ -1151,8 +1175,8 @@ "type": "string" } }, - "description": "Use raw opensearch-py + JVector KNN", - "display_description": "Use raw opensearch-py + JVector KNN", + "description": "Hybrid search: KNN + keyword, with optional filters, min_score, and aggregations.", + "display_description": "Hybrid search: KNN + keyword, with optional filters, min_score, and aggregations.", "display_name": "as_vector_store", "name": "as_vector_store", "readonly": false, @@ -1163,24 +1187,6 @@ } ] }, - "use_jvector": { - "_input_type": "BoolInput", - "advanced": false, - "display_name": "Use JVector KNN Search", - "dynamic": false, - "info": "Toggle raw JVector knn vs. fallback search.", - "list": false, - "list_add_label": "Add More", - "name": "use_jvector", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true - }, "use_ssl": { "_input_type": "BoolInput", "advanced": true, @@ -1223,7 +1229,7 @@ "advanced": true, "display_name": "Vector Field", "dynamic": false, - "info": "Name of the JVector field in your index.", + "info": "Vector field used for KNN.", "list": false, "list_add_label": "Add More", "load_from_db": false, @@ -1260,19 +1266,19 @@ }, "selected_output": "search_results", "showNode": true, - "type": "OpenSearch" + "type": "OpenSearchHybrid" }, "dragging": false, "id": "OpenSearch-iYfjf", "measured": { - "height": 695, + "height": 751, "width": 320 }, "position": { "x": 1202.1762389080463, "y": 112.65887163017715 }, - "selected": false, + "selected": true, "type": "genericNode" }, { @@ -1305,7 +1311,7 @@ "frozen": false, "icon": "binary", "key": "EmbeddingModel", - "last_updated": "2025-08-12T02:50:06.375Z", + "last_updated": "2025-08-12T19:55:29.859Z", "legacy": false, "lf_version": "1.5.0.post1", "metadata": {}, @@ -1566,10 +1572,10 @@ "width": 320 }, "position": { - "x": 692.1919187941104, - "y": 354.58450527794975 + "x": 727.4791597769406, + "y": 518.0820551650631 }, - "selected": true, + "selected": false, "type": "genericNode" }, { @@ -1610,8 +1616,9 @@ ], "frozen": false, "icon": "bot", - "last_updated": "2025-08-12T02:49:30.271Z", + "last_updated": "2025-08-12T19:55:29.860Z", "legacy": false, + "lf_version": "1.5.0.post1", "metadata": { "code_hash": "533aac5f6185", "module": "langflow.components.agents.agent.AgentComponent" @@ -2119,12 +2126,112 @@ }, "selected": false, "type": "genericNode" + }, + { + "data": { + "id": "TextInput-aHsQb", + "node": { + "base_classes": [ + "Message" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Get user text inputs.", + "display_name": "Text Input", + "documentation": "https://docs.langflow.org/components-io#text-input", + "edited": true, + "field_order": [ + "input_value" + ], + "frozen": false, + "icon": "type", + "legacy": false, + "lf_version": "1.5.0.post1", + "metadata": {}, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Output Text", + "group_outputs": false, + "hidden": null, + "method": "text_response", + "name": "text", + "options": null, + "required_inputs": null, + "selected": "Message", + "tool_mode": true, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "from langflow.base.io.text import TextComponent\nfrom langflow.io import SecretStrInput, Output\nfrom langflow.schema.message import Message\n\n\nclass TextInputComponent(TextComponent):\n display_name = \"Text Input\"\n description = \"Get user text inputs.\"\n documentation: str = \"https://docs.langflow.org/components-io#text-input\"\n icon = \"type\"\n name = \"TextInput\"\n\n inputs = [\n SecretStrInput(\n name=\"input_value\",\n display_name=\"Text\",\n info=\"Text to be passed as input.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Output Text\", name=\"text\", method=\"text_response\"),\n ]\n\n def text_response(self) -> Message:\n return Message(\n text=self.input_value,\n )\n" + }, + "input_value": { + "_input_type": "SecretStrInput", + "advanced": false, + "display_name": "Text", + "dynamic": false, + "info": "Text to be passed as input.", + "input_types": [], + "load_from_db": true, + "name": "input_value", + "password": true, + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "type": "str", + "value": "GENDB-QUERY-FILTER" + } + }, + "tool_mode": false + }, + "showNode": true, + "type": "TextInput" + }, + "dragging": false, + "id": "TextInput-aHsQb", + "measured": { + "height": 204, + "width": 320 + }, + "position": { + "x": 745.3341059713564, + "y": 95.0152511387621 + }, + "selected": false, + "type": "genericNode" } ], "viewport": { - "x": -467.6574511455983, - "y": -53.77870229496875, - "zoom": 0.850165645270968 + "x": -459.5606878274141, + "y": -37.34645580037716, + "zoom": 0.835091786312263 } }, "description": "GenDB Open Search Agent", diff --git a/frontend/src/app/page.tsx b/frontend/src/app/page.tsx index e2f2ad55..c37b0e4b 100644 --- a/frontend/src/app/page.tsx +++ b/frontend/src/app/page.tsx @@ -10,6 +10,7 @@ import { Checkbox } from "@/components/ui/checkbox" import { Collapsible, CollapsibleContent, CollapsibleTrigger } from "@/components/ui/collapsible" import { Search, Loader2, FileText, Zap, ChevronDown, ChevronUp, Filter, X, Settings, Save } from "lucide-react" import { ProtectedRoute } from "@/components/protected-route" +import { toast } from 'sonner' interface SearchResult { filename: string @@ -329,12 +330,12 @@ function SearchPage() { setContextTitle("") setContextDescription("") } - console.log(contextId ? "Context updated successfully:" : "Context saved successfully:", result) + toast.success(contextId ? "Context updated successfully" : "Context saved successfully") } else { - console.error(contextId ? "Failed to update context:" : "Failed to save context:", result.error) + toast.error(contextId ? "Failed to update context" : "Failed to save context") } } catch (error) { - console.error(contextId ? "Error updating context:" : "Error saving context:", error) + toast.error(contextId ? "Error updating context" : "Error saving context") } finally { setSavingContext(false) } diff --git a/src/services/chat_service.py b/src/services/chat_service.py index 00b0872b..48c6bd8d 100644 --- a/src/services/chat_service.py +++ b/src/services/chat_service.py @@ -1,6 +1,7 @@ from config.settings import clients, LANGFLOW_URL, FLOW_ID, LANGFLOW_KEY from agent import async_chat, async_langflow, async_chat_stream, async_langflow_stream from auth_context import set_auth_context +import json class ChatService: @@ -35,6 +36,50 @@ class ChatService: if jwt_token: extra_headers['X-LANGFLOW-GLOBAL-VAR-JWT'] = jwt_token + # Get context variables for filters, limit, and threshold + from auth_context import get_search_filters, get_search_limit, get_score_threshold + filters = get_search_filters() + limit = get_search_limit() + score_threshold = get_score_threshold() + + # Build the complete filter expression like the search service does + filter_expression = {} + if filters: + filter_clauses = [] + # Map frontend filter names to backend field names + field_mapping = { + "data_sources": "filename", + "document_types": "mimetype", + "owners": "owner" + } + + for filter_key, values in filters.items(): + if values is not None and isinstance(values, list) and len(values) > 0: + # Map frontend key to backend field name + field_name = field_mapping.get(filter_key, filter_key) + + if len(values) == 1: + # Single value filter + filter_clauses.append({"term": {field_name: values[0]}}) + else: + # Multiple values filter + filter_clauses.append({"terms": {field_name: values}}) + + if filter_clauses: + filter_expression["filter"] = filter_clauses + + # Add limit and score threshold to the filter expression (only if different from defaults) + if limit and limit != 10: # 10 is the default limit + filter_expression["limit"] = limit + + if score_threshold and score_threshold != 0: # 0 is the default threshold + filter_expression["score_threshold"] = score_threshold + + # Pass the complete filter expression as a single header to Langflow (only if we have something to send) + if filter_expression: + print(f"Sending GenDB query filter to Langflow: {json.dumps(filter_expression, indent=2)}") + extra_headers['X-LANGFLOW-GLOBAL-VAR-GENDB-QUERY-FILTER'] = json.dumps(filter_expression) + if stream: return async_langflow_stream(clients.langflow_client, FLOW_ID, prompt, extra_headers=extra_headers, previous_response_id=previous_response_id) else: