From 55e1c498ede58ec901e71ee67665bc85ce8c42a0 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Mon, 8 Dec 2025 08:52:30 -0800 Subject: [PATCH 01/11] fix: Setup option inaccessible by available --- frontend/components/layout-wrapper.tsx | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/frontend/components/layout-wrapper.tsx b/frontend/components/layout-wrapper.tsx index 08eea73d..dbaf42da 100644 --- a/frontend/components/layout-wrapper.tsx +++ b/frontend/components/layout-wrapper.tsx @@ -15,6 +15,7 @@ import { } from "@/components/provider-health-banner"; import { TaskNotificationMenu } from "@/components/task-notification-menu"; import { useAuth } from "@/contexts/auth-context"; +import { useChat } from "@/contexts/chat-context"; import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context"; import { useTask } from "@/contexts/task-context"; import { cn } from "@/lib/utils"; @@ -27,6 +28,7 @@ export function LayoutWrapper({ children }: { children: React.ReactNode }) { const { isMenuOpen } = useTask(); const { isPanelOpen } = useKnowledgeFilter(); const { isLoading, isAuthenticated, isNoAuthMode } = useAuth(); + const { isOnboardingComplete } = useChat(); // List of paths that should not show navigation const authPaths = ["/login", "/auth/callback"]; @@ -91,17 +93,17 @@ export function LayoutWrapper({ children }: { children: React.ReactNode }) { isOpen={isDoclingUnhealthy} className="w-full" > - + + + {settings?.edited && isOnboardingComplete && ( + + - {settings?.edited && ( - - - - )} + )} {children} From a467e8a9b6d35de0dd5d84354771ff7d9463af73 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Mon, 8 Dec 2025 09:09:05 -0800 Subject: [PATCH 02/11] fix: Support for txt file processing outside of Docling --- src/models/processors.py | 25 +++++++++-- src/services/document_service.py | 48 +++++++++++++------- src/utils/document_processing.py | 76 ++++++++++++++++++++++++++++++++ 3 files changed, 129 insertions(+), 20 deletions(-) diff --git a/src/models/processors.py b/src/models/processors.py index 9731adb7..8f84c3dc 100644 --- a/src/models/processors.py +++ b/src/models/processors.py @@ -197,10 +197,27 @@ class TaskProcessor: file_hash=file_hash, ) - # Convert and extract - result = clients.converter.convert(file_path) - full_doc = result.document.export_to_dict() - slim_doc = extract_relevant(full_doc) + # Check if this is a .txt file - use simple processing instead of docling + import os + file_ext = os.path.splitext(file_path)[1].lower() + + if file_ext == '.txt': + # Simple text file processing without docling + from utils.document_processing import process_text_file + logger.info( + "Processing as plain text file (bypassing docling)", + file_path=file_path, + file_hash=file_hash, + ) + slim_doc = process_text_file(file_path) + # Override filename with original_filename if provided + if original_filename: + slim_doc["filename"] = original_filename + else: + # Convert and extract using docling for other file types + result = clients.converter.convert(file_path) + full_doc = result.document.export_to_dict() + slim_doc = extract_relevant(full_doc) texts = [c["text"] for c in slim_doc["chunks"]] diff --git a/src/services/document_service.py b/src/services/document_service.py index de1b3cf6..f40c3d82 100644 --- a/src/services/document_service.py +++ b/src/services/document_service.py @@ -181,6 +181,7 @@ class DocumentService: async def process_upload_context(self, upload_file, filename: str = None): """Process uploaded file and return content for context""" import io + import os if not filename: filename = upload_file.filename or "uploaded_document" @@ -194,22 +195,37 @@ class DocumentService: content.write(chunk) content.seek(0) # Reset to beginning for reading - # Create DocumentStream and process with docling - doc_stream = DocumentStream(name=filename, stream=content) - result = clients.converter.convert(doc_stream) - full_doc = result.document.export_to_dict() - slim_doc = extract_relevant(full_doc) + # Check if this is a .txt file - use simple processing + file_ext = os.path.splitext(filename)[1].lower() + + if file_ext == '.txt': + # Simple text file processing for chat context + text_content = content.read().decode('utf-8', errors='replace') + + # For context, we don't need to chunk - just return the full content + return { + "filename": filename, + "content": text_content, + "pages": 1, # Text files don't have pages + "content_length": len(text_content), + } + else: + # Create DocumentStream and process with docling + doc_stream = DocumentStream(name=filename, stream=content) + result = clients.converter.convert(doc_stream) + full_doc = result.document.export_to_dict() + slim_doc = extract_relevant(full_doc) - # Extract all text content - all_text = [] - for chunk in slim_doc["chunks"]: - all_text.append(f"Page {chunk['page']}:\n{chunk['text']}") + # Extract all text content + all_text = [] + for chunk in slim_doc["chunks"]: + all_text.append(f"Page {chunk['page']}:\n{chunk['text']}") - full_content = "\n\n".join(all_text) + full_content = "\n\n".join(all_text) - return { - "filename": filename, - "content": full_content, - "pages": len(slim_doc["chunks"]), - "content_length": len(full_content), - } + return { + "filename": filename, + "content": full_content, + "pages": len(slim_doc["chunks"]), + "content_length": len(full_content), + } diff --git a/src/utils/document_processing.py b/src/utils/document_processing.py index fcb458fb..9619cf74 100644 --- a/src/utils/document_processing.py +++ b/src/utils/document_processing.py @@ -119,6 +119,82 @@ def get_worker_converter(): return _worker_converter +def process_text_file(file_path: str) -> dict: + """ + Process a plain text file without using docling. + Returns the same structure as extract_relevant() for consistency. + + Args: + file_path: Path to the .txt file + + Returns: + dict with keys: id, filename, mimetype, chunks + """ + import os + from utils.hash_utils import hash_id + + # Read the file + with open(file_path, 'r', encoding='utf-8', errors='replace') as f: + content = f.read() + + # Compute hash + file_hash = hash_id(file_path) + filename = os.path.basename(file_path) + + # Split content into chunks of ~1000 characters to match typical docling chunk sizes + # This ensures embeddings stay within reasonable token limits + chunk_size = 1000 + chunks = [] + + # Split by paragraphs first (double newline) + paragraphs = content.split('\n\n') + current_chunk = "" + chunk_index = 0 + + for para in paragraphs: + para = para.strip() + if not para: + continue + + # If adding this paragraph would exceed chunk size, save current chunk + if len(current_chunk) + len(para) + 2 > chunk_size and current_chunk: + chunks.append({ + "page": chunk_index + 1, # Use chunk_index + 1 as "page" number + "type": "text", + "text": current_chunk.strip() + }) + chunk_index += 1 + current_chunk = para + else: + if current_chunk: + current_chunk += "\n\n" + para + else: + current_chunk = para + + # Add the last chunk if any + if current_chunk.strip(): + chunks.append({ + "page": chunk_index + 1, + "type": "text", + "text": current_chunk.strip() + }) + + # If no chunks were created (empty file), create a single empty chunk + if not chunks: + chunks.append({ + "page": 1, + "type": "text", + "text": "" + }) + + return { + "id": file_hash, + "filename": filename, + "mimetype": "text/plain", + "chunks": chunks, + } + + def extract_relevant(doc_dict: dict) -> dict: """ Given the full export_to_dict() result: From 79cc531dc43c4090240aca85bdda6f6851382b2b Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Mon, 8 Dec 2025 09:14:40 -0800 Subject: [PATCH 03/11] fix: Configurable ingestion timeout limits --- .env.example | 8 ++++++++ src/config/settings.py | 23 +++++++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/.env.example b/.env.example index b4b1b88b..081c9026 100644 --- a/.env.example +++ b/.env.example @@ -2,6 +2,14 @@ # Set to true to disable Langflow ingestion and use traditional OpenRAG processor # If unset or false, Langflow pipeline will be used (default: upload -> ingest -> delete) DISABLE_INGEST_WITH_LANGFLOW=false + +# Langflow HTTP timeout configuration (in seconds) +# For large documents (300+ pages), ingestion can take 30+ minutes +# Increase these values if you experience timeouts with very large PDFs +# Default: 2400 seconds (40 minutes) total timeout, 30 seconds connection timeout +# LANGFLOW_TIMEOUT=2400 +# LANGFLOW_CONNECT_TIMEOUT=30 + # make one like so https://docs.langflow.org/api-keys-and-authentication#langflow-secret-key LANGFLOW_SECRET_KEY= diff --git a/src/config/settings.py b/src/config/settings.py index b590ab8b..f3e334b4 100644 --- a/src/config/settings.py +++ b/src/config/settings.py @@ -59,6 +59,12 @@ DISABLE_INGEST_WITH_LANGFLOW = os.getenv( "DISABLE_INGEST_WITH_LANGFLOW", "false" ).lower() in ("true", "1", "yes") +# Langflow HTTP timeout configuration (in seconds) +# For large documents (300+ pages), ingestion can take 30+ minutes +# Default: 40 minutes total, 40 minutes read timeout +LANGFLOW_TIMEOUT = float(os.getenv("LANGFLOW_TIMEOUT", "2400")) # 40 minutes +LANGFLOW_CONNECT_TIMEOUT = float(os.getenv("LANGFLOW_CONNECT_TIMEOUT", "30")) # 30 seconds + def is_no_auth_mode(): """Check if we're running in no-auth mode (OAuth credentials missing)""" @@ -317,9 +323,22 @@ class AppClients: # Initialize document converter self.converter = create_document_converter(ocr_engine=DOCLING_OCR_ENGINE) - # Initialize Langflow HTTP client + # Initialize Langflow HTTP client with extended timeouts for large documents + # Use explicit timeout configuration to handle large PDF ingestion (300+ pages) self.langflow_http_client = httpx.AsyncClient( - base_url=LANGFLOW_URL, timeout=1200.0 + base_url=LANGFLOW_URL, + timeout=httpx.Timeout( + timeout=LANGFLOW_TIMEOUT, # Total timeout + connect=LANGFLOW_CONNECT_TIMEOUT, # Connection timeout + read=LANGFLOW_TIMEOUT, # Read timeout (most important for large PDFs) + write=LANGFLOW_CONNECT_TIMEOUT, # Write timeout + pool=LANGFLOW_CONNECT_TIMEOUT, # Pool timeout + ) + ) + logger.info( + "Initialized Langflow HTTP client with extended timeouts", + timeout_seconds=LANGFLOW_TIMEOUT, + connect_timeout_seconds=LANGFLOW_CONNECT_TIMEOUT, ) return self From 9eaec94a2f3966ca26da1b3267b192e42a2d8bf6 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Mon, 8 Dec 2025 09:33:21 -0800 Subject: [PATCH 04/11] fix: Revise prompt to handle markdown uploads --- src/agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agent.py b/src/agent.py index dd092643..db332dc1 100644 --- a/src/agent.py +++ b/src/agent.py @@ -36,7 +36,7 @@ def get_conversation_thread(user_id: str, previous_response_id: str = None): "messages": [ { "role": "system", - "content": "You are the OpenRAG Agent. You answer questions using retrieval, reasoning, and tool use.\nYou have access to several tools. Your job is to determine **which tool to use and when**.\n### Available Tools\n- OpenSearch Retrieval Tool:\n Use this to search the indexed knowledge base. Use when the user asks about product details, internal concepts, processes, architecture, documentation, roadmaps, or anything that may be stored in the index.\n- Conversation History:\n Use this to maintain continuity when the user is referring to previous turns. \n Do not treat history as a factual source.\n- Conversation File Context:\n Use this when the user asks about a document they uploaded or refers directly to its contents.\n- URL Ingestion Tool:\n Use this **only** when the user explicitly asks you to read, summarize, or analyze the content of a URL.\n Do not ingest URLs automatically.\n- Calculator / Expression Evaluation Tool:\n Use this when the user asks to compare numbers, compute estimates, calculate totals, analyze pricing, or answer any question requiring mathematics or quantitative reasoning.\n If the answer requires arithmetic, call the calculator tool rather than calculating internally.\n### Retrieval Decision Rules\nUse OpenSearch **whenever**:\n1. The question may be answered from internal or indexed data.\n2. The user references team names, product names, release plans, configurations, requirements, or official information.\n3. The user needs a factual, grounded answer.\nDo **not** use retrieval if:\n- The question is purely creative (e.g., storytelling, analogies) or personal preference.\n- The user simply wants text reformatted or rewritten from what is already present in the conversation.\nWhen uncertain → **Retrieve.** Retrieval is low risk and improves grounding.\n### URL Ingestion Rules\nOnly ingest URLs when the user explicitly says:\n- \"Read this link\"\n- \"Summarize this webpage\"\n- \"What does this site say?\"\n- \"Ingest this URL\"\nIf unclear → ask a clarifying question.\n### Calculator Usage Rules\nUse the calculator when:\n- Performing arithmetic\n- Estimating totals\n- Comparing values\n- Modeling cost, time, effort, scale, or projections\nDo not perform math internally. **Call the calculator tool instead.**\n### Answer Construction Rules\n1. When asked: \"What is OpenRAG\", answer the following:\n\"OpenRAG is an open-source package for building agentic RAG systems. It supports integration with a wide range of orchestration tools, vector databases, and LLM providers. OpenRAG connects and amplifies three popular, proven open-source projects into one powerful platform:\n**Langflow** – Langflow is a powerful tool to build and deploy AI agents and MCP servers. [Read more](https://www.langflow.org/)\n**OpenSearch** – OpenSearch is an open source, search and observability suite that brings order to unstructured data at scale. [Read more](https://opensearch.org/)\n**Docling** – Docling simplifies document processing with advanced PDF understanding, OCR support, and seamless AI integrations. Parse PDFs, DOCX, PPTX, images & more. [Read more](https://www.docling.ai/)\"\n2. Synthesize retrieved or ingested content in your own words.\n3. Support factual claims with citations in the format:\n (Source: )\n4. If no supporting evidence is found:\n Say: \"No relevant supporting sources were found for that request.\"\n5. Never invent facts or hallucinate details.\n6. Be concise, direct, and confident. \n7. Do not reveal internal chain-of-thought.", + "content": "You are the OpenRAG Agent. You answer questions using retrieval, reasoning, and tool use.\nYou have access to several tools. Your job is to determine **which tool to use and when**.\n### Available Tools\n- OpenSearch Retrieval Tool:\n Use this to search the indexed knowledge base. Use when the user asks about product details, internal concepts, processes, architecture, documentation, roadmaps, or anything that may be stored in the index.\n- Conversation History:\n Use this to maintain continuity when the user is referring to previous turns. \n Do not treat history as a factual source.\n- Conversation File Context:\n Use this when the user asks about a document they uploaded or refers directly to its contents.\n **IMPORTANT**: If you receive confirmation that a file was uploaded (e.g., \"Confirm that you received this file\"), the file content is already available in the conversation context. Do NOT attempt to ingest it as a URL.\n Simply acknowledge the file and answer questions about it directly from the context.\n- URL Ingestion Tool:\n Use this **only** when the user explicitly asks you to read, summarize, or analyze the content of a web URL (http:// or https://).\n **Do NOT use this tool for filenames** (e.g., README.md, document.pdf, data.txt). These are file uploads, not URLs.\n Only use this tool for actual web addresses that the user explicitly provides.\n If unclear → ask a clarifying question.\n- Calculator / Expression Evaluation Tool:\n Use this when the user asks to compare numbers, compute estimates, calculate totals, analyze pricing, or answer any question requiring mathematics or quantitative reasoning.\n If the answer requires arithmetic, call the calculator tool rather than calculating internally.\n### Retrieval Decision Rules\nUse OpenSearch **whenever**:\n1. The question may be answered from internal or indexed data.\n2. The user references team names, product names, release plans, configurations, requirements, or official information.\n3. The user needs a factual, grounded answer.\nDo **not** use retrieval if:\n- The question is purely creative (e.g., storytelling, analogies) or personal preference.\n- The user simply wants text reformatted or rewritten from what is already present in the conversation.\nWhen uncertain → **Retrieve.** Retrieval is low risk and improves grounding.\n### File Upload vs URL Distinction\n**File uploads** (already in context):\n- Filenames like: README.md, document.pdf, notes.txt, data.csv\n- When you see file confirmation messages\n- Use conversation context directly - do NOT call URL tool\n**Web URLs** (need ingestion):\n- Start with http:// or https://\n- Examples: https://example.com, http://docs.site.org\n- User explicitly asks to fetch from web\n### Calculator Usage Rules\nUse the calculator when:\n- Performing arithmetic\n- Estimating totals\n- Comparing values\n- Modeling cost, time, effort, scale, or projections\nDo not perform math internally. **Call the calculator tool instead.**\n### Answer Construction Rules\n1. When asked: \"What is OpenRAG\", answer the following:\n\"OpenRAG is an open-source package for building agentic RAG systems. It supports integration with a wide range of orchestration tools, vector databases, and LLM providers. OpenRAG connects and amplifies three popular, proven open-source projects into one powerful platform:\n**Langflow** – Langflow is a powerful tool to build and deploy AI agents and MCP servers. [Read more](https://www.langflow.org/)\n**OpenSearch** – OpenSearch is an open source, search and observability suite that brings order to unstructured data at scale. [Read more](https://opensearch.org/)\n**Docling** – Docling simplifies document processing with advanced PDF understanding, OCR support, and seamless AI integrations. Parse PDFs, DOCX, PPTX, images & more. [Read more](https://www.docling.ai/)\"\n2. Synthesize retrieved or ingested content in your own words.\n3. Support factual claims with citations in the format:\n (Source: )\n4. If no supporting evidence is found:\n Say: \"No relevant supporting sources were found for that request.\"\n5. Never invent facts or hallucinate details.\n6. Be concise, direct, and confident. \n7. Do not reveal internal chain-of-thought.", } ], "previous_response_id": previous_response_id, # Parent response_id for branching From 7074de5ba74ac486e68c42416b810ecaf08c3f67 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Tue, 9 Dec 2025 10:07:21 -0800 Subject: [PATCH 05/11] fix: Tool calling for granite 3.3 --- frontend/hooks/useChatStreaming.ts | 72 ++++++++++++++++++++++++++++++ src/agent.py | 62 +++++++++++++++++++++++++ 2 files changed, 134 insertions(+) diff --git a/frontend/hooks/useChatStreaming.ts b/frontend/hooks/useChatStreaming.ts index c67a0ca6..89d0d810 100644 --- a/frontend/hooks/useChatStreaming.ts +++ b/frontend/hooks/useChatStreaming.ts @@ -162,6 +162,19 @@ export function useChatStreaming({ if (line.trim()) { try { const chunk = JSON.parse(line); + + // Investigation logging for Granite 3.3 8b tool call detection + const chunkKeys = Object.keys(chunk); + const toolRelatedKeys = chunkKeys.filter(key => + key.toLowerCase().includes('tool') || + key.toLowerCase().includes('call') || + key.toLowerCase().includes('retrieval') || + key.toLowerCase().includes('function') || + key.toLowerCase().includes('result') + ); + if (toolRelatedKeys.length > 0) { + console.log('[Tool Detection] Found tool-related keys:', toolRelatedKeys, chunk); + } // Extract response ID if present if (chunk.id) { @@ -449,6 +462,42 @@ export function useChatStreaming({ } } } + + // Heuristic detection for implicit tool calls (Granite 3.3 8b workaround) + // Check if chunk contains retrieval results without explicit tool call markers + const hasImplicitToolCall = ( + // Check for various result indicators in the chunk + (chunk.results && Array.isArray(chunk.results) && chunk.results.length > 0) || + (chunk.outputs && Array.isArray(chunk.outputs) && chunk.outputs.length > 0) || + // Check for retrieval-related fields + chunk.retrieved_documents || + chunk.retrieval_results || + // Check for nested data structures that might contain results + (chunk.data && typeof chunk.data === 'object' && ( + chunk.data.results || + chunk.data.retrieved_documents || + chunk.data.retrieval_results + )) + ); + + if (hasImplicitToolCall && currentFunctionCalls.length === 0) { + console.log('[Heuristic Detection] Detected implicit tool call:', chunk); + + // Create a synthetic function call for the UI + const results = chunk.results || chunk.outputs || chunk.retrieved_documents || + chunk.retrieval_results || chunk.data?.results || + chunk.data?.retrieved_documents || []; + + const syntheticFunctionCall: FunctionCall = { + name: "Retrieval", + arguments: { implicit: true, detected_heuristically: true }, + status: "completed", + type: "retrieval_call", + result: results, + }; + currentFunctionCalls.push(syntheticFunctionCall); + console.log('[Heuristic Detection] Created synthetic function call'); + } // Update streaming message in real-time if ( @@ -486,6 +535,29 @@ export function useChatStreaming({ "No response received from the server. Please try again.", ); } + + // Post-processing: Heuristic detection based on final content + // If no explicit tool calls detected but content shows RAG indicators + if (currentFunctionCalls.length === 0 && currentContent) { + // Check for citation patterns that indicate RAG usage + const hasCitations = /\(Source:|\[Source:|\bSource:|filename:|document:/i.test(currentContent); + // Check for common RAG response patterns + const hasRAGPattern = /based on.*(?:document|file|information|data)|according to.*(?:document|file)/i.test(currentContent); + + if (hasCitations || hasRAGPattern) { + console.log('[Post-Processing] Detected RAG usage from content patterns'); + const syntheticFunctionCall: FunctionCall = { + name: "Retrieval", + arguments: { + implicit: true, + detected_from: hasCitations ? "citations" : "content_patterns" + }, + status: "completed", + type: "retrieval_call", + }; + currentFunctionCalls.push(syntheticFunctionCall); + } + } // Finalize the message const finalMessage: Message = { diff --git a/src/agent.py b/src/agent.py index dd092643..278386e5 100644 --- a/src/agent.py +++ b/src/agent.py @@ -135,6 +135,7 @@ async def async_response_stream( full_response = "" chunk_count = 0 + detected_tool_call = False # Track if we've detected a tool call async for chunk in response: chunk_count += 1 logger.debug( @@ -158,6 +159,17 @@ async def async_response_stream( else: delta_text = str(chunk.delta) full_response += delta_text + + # Enhanced logging for tool call detection (Granite 3.3 8b investigation) + chunk_attrs = dir(chunk) if hasattr(chunk, '__dict__') else [] + tool_related_attrs = [attr for attr in chunk_attrs if 'tool' in attr.lower() or 'call' in attr.lower() or 'retrieval' in attr.lower()] + if tool_related_attrs: + logger.info( + "Tool-related attributes found in chunk", + chunk_count=chunk_count, + attributes=tool_related_attrs, + chunk_type=type(chunk).__name__ + ) # Send the raw event as JSON followed by newline for easy parsing try: @@ -169,7 +181,57 @@ async def async_response_stream( chunk_data = chunk.__dict__ else: chunk_data = str(chunk) + + # Log detailed chunk structure for investigation (especially for Granite 3.3 8b) + if isinstance(chunk_data, dict): + # Check for any fields that might indicate tool usage + potential_tool_fields = { + k: v for k, v in chunk_data.items() + if any(keyword in str(k).lower() for keyword in ['tool', 'call', 'retrieval', 'function', 'result', 'output']) + } + if potential_tool_fields: + logger.info( + "Potential tool-related fields in chunk", + chunk_count=chunk_count, + fields=list(potential_tool_fields.keys()), + sample_data=str(potential_tool_fields)[:500] + ) + # Middleware: Detect implicit tool calls and inject standardized events + # This helps Granite 3.3 8b and other models that don't emit standard markers + if isinstance(chunk_data, dict) and not detected_tool_call: + # Check if this chunk contains retrieval results + has_results = any([ + 'results' in chunk_data and isinstance(chunk_data.get('results'), list), + 'outputs' in chunk_data and isinstance(chunk_data.get('outputs'), list), + 'retrieved_documents' in chunk_data, + 'retrieval_results' in chunk_data, + ]) + + if has_results: + logger.info( + "Detected implicit tool call in backend, injecting synthetic event", + chunk_fields=list(chunk_data.keys()) + ) + # Inject a synthetic tool call event before this chunk + synthetic_event = { + "type": "response.output_item.done", + "item": { + "type": "retrieval_call", + "id": f"synthetic_{chunk_count}", + "name": "Retrieval", + "tool_name": "Retrieval", + "status": "completed", + "inputs": {"implicit": True, "backend_detected": True}, + "results": chunk_data.get('results') or chunk_data.get('outputs') or + chunk_data.get('retrieved_documents') or + chunk_data.get('retrieval_results') or [] + } + } + # Send the synthetic event first + yield (json.dumps(synthetic_event, default=str) + "\n").encode("utf-8") + detected_tool_call = True # Mark that we've injected a tool call + yield (json.dumps(chunk_data, default=str) + "\n").encode("utf-8") except Exception as e: # Fallback to string representation From 2c4a67bcbbc25d35132698a2ef9f6279c0a59bcc Mon Sep 17 00:00:00 2001 From: Mike Fortman Date: Tue, 9 Dec 2025 14:37:13 -0600 Subject: [PATCH 06/11] setup secret detection --- .pre-commit-config.yaml | 7 + .secrets.baseline | 198 +++++++++++++++++++++++++ src/connectors/onedrive/connector.py | 2 +- src/connectors/sharepoint/connector.py | 2 +- src/tui/managers/env_manager.py | 21 +-- 5 files changed, 218 insertions(+), 12 deletions(-) create mode 100644 .pre-commit-config.yaml create mode 100644 .secrets.baseline diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..b6c7a6fc --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,7 @@ +repos: + - repo: https://github.com/Yelp/detect-secrets + rev: v1.5.0 + hooks: + - id: detect-secrets + args: ["--baseline", ".secrets.baseline", "--exclude-lines", "code_hash"] + diff --git a/.secrets.baseline b/.secrets.baseline new file mode 100644 index 00000000..d5de1c8c --- /dev/null +++ b/.secrets.baseline @@ -0,0 +1,198 @@ +{ + "version": "1.5.0", + "plugins_used": [ + { + "name": "ArtifactoryDetector" + }, + { + "name": "AWSKeyDetector" + }, + { + "name": "AzureStorageKeyDetector" + }, + { + "name": "Base64HighEntropyString", + "limit": 4.5 + }, + { + "name": "BasicAuthDetector" + }, + { + "name": "CloudantDetector" + }, + { + "name": "DiscordBotTokenDetector" + }, + { + "name": "GitHubTokenDetector" + }, + { + "name": "GitLabTokenDetector" + }, + { + "name": "HexHighEntropyString", + "limit": 3.0 + }, + { + "name": "IbmCloudIamDetector" + }, + { + "name": "IbmCosHmacDetector" + }, + { + "name": "IPPublicDetector" + }, + { + "name": "JwtTokenDetector" + }, + { + "name": "KeywordDetector", + "keyword_exclude": "" + }, + { + "name": "MailchimpDetector" + }, + { + "name": "NpmDetector" + }, + { + "name": "OpenAIDetector" + }, + { + "name": "PrivateKeyDetector" + }, + { + "name": "PypiTokenDetector" + }, + { + "name": "SendGridDetector" + }, + { + "name": "SlackDetector" + }, + { + "name": "SoftlayerDetector" + }, + { + "name": "SquareOAuthDetector" + }, + { + "name": "StripeDetector" + }, + { + "name": "TelegramBotTokenDetector" + }, + { + "name": "TwilioKeyDetector" + } + ], + "filters_used": [ + { + "path": "detect_secrets.filters.allowlist.is_line_allowlisted" + }, + { + "path": "detect_secrets.filters.common.is_baseline_file", + "filename": ".secrets.baseline" + }, + { + "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", + "min_level": 2 + }, + { + "path": "detect_secrets.filters.heuristic.is_indirect_reference" + }, + { + "path": "detect_secrets.filters.heuristic.is_likely_id_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_lock_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_not_alphanumeric_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_potential_uuid" + }, + { + "path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign" + }, + { + "path": "detect_secrets.filters.heuristic.is_sequential_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_swagger_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_templated_secret" + }, + { + "path": "detect_secrets.filters.regex.should_exclude_file", + "pattern": [ + "flows/.*\\.json$" + ] + }, + { + "path": "detect_secrets.filters.regex.should_exclude_line", + "pattern": [ + "code_hash" + ] + } + ], + "results": { + "docs/docs/_partial-integrate-chat.mdx": [ + { + "type": "Secret Keyword", + "filename": "docs/docs/_partial-integrate-chat.mdx", + "hashed_secret": "e42fd8b9ad15d8fa5f4718cad7cf19b522807996", + "is_verified": false, + "line_number": 30 + } + ], + "src/connectors/onedrive/connector.py": [ + { + "type": "Secret Keyword", + "filename": "src/connectors/onedrive/connector.py", + "hashed_secret": "bf45445eaa4f57092a404fa8c7338d59d8c0ecef", + "is_verified": false, + "line_number": 18 + } + ], + "src/connectors/sharepoint/connector.py": [ + { + "type": "Secret Keyword", + "filename": "src/connectors/sharepoint/connector.py", + "hashed_secret": "bf45445eaa4f57092a404fa8c7338d59d8c0ecef", + "is_verified": false, + "line_number": 19 + } + ], + "src/main.py": [ + { + "type": "Base64 High Entropy String", + "filename": "src/main.py", + "hashed_secret": "131a83e9ef8660d7dd0771da7ce5954d9ea801ee", + "is_verified": false, + "line_number": 404 + } + ], + "src/models/processors.py": [ + { + "type": "Base64 High Entropy String", + "filename": "src/models/processors.py", + "hashed_secret": "131a83e9ef8660d7dd0771da7ce5954d9ea801ee", + "is_verified": false, + "line_number": 763 + } + ], + "src/services/langflow_file_service.py": [ + { + "type": "Base64 High Entropy String", + "filename": "src/services/langflow_file_service.py", + "hashed_secret": "131a83e9ef8660d7dd0771da7ce5954d9ea801ee", + "is_verified": false, + "line_number": 97 + } + ] + }, + "generated_at": "2025-12-09T20:24:08Z" +} diff --git a/src/connectors/onedrive/connector.py b/src/connectors/onedrive/connector.py index a88321d3..796e4310 100644 --- a/src/connectors/onedrive/connector.py +++ b/src/connectors/onedrive/connector.py @@ -15,7 +15,7 @@ class OneDriveConnector(BaseConnector): # Required BaseConnector class attributes CLIENT_ID_ENV_VAR = "MICROSOFT_GRAPH_OAUTH_CLIENT_ID" - CLIENT_SECRET_ENV_VAR = "MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET" + CLIENT_SECRET_ENV_VAR = "MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET" # pragma: allowlist secret # Connector metadata CONNECTOR_NAME = "OneDrive" diff --git a/src/connectors/sharepoint/connector.py b/src/connectors/sharepoint/connector.py index f84d3575..df6dc102 100644 --- a/src/connectors/sharepoint/connector.py +++ b/src/connectors/sharepoint/connector.py @@ -16,7 +16,7 @@ class SharePointConnector(BaseConnector): # Required BaseConnector class attributes CLIENT_ID_ENV_VAR = "MICROSOFT_GRAPH_OAUTH_CLIENT_ID" - CLIENT_SECRET_ENV_VAR = "MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET" + CLIENT_SECRET_ENV_VAR = "MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET" # pragma: allowlist secret # Connector metadata CONNECTOR_NAME = "SharePoint" diff --git a/src/tui/managers/env_manager.py b/src/tui/managers/env_manager.py index a3d3ea6f..51e2a11f 100644 --- a/src/tui/managers/env_manager.py +++ b/src/tui/managers/env_manager.py @@ -123,28 +123,29 @@ class EnvManager: import os # Map env vars to config attributes - attr_map = { - "OPENAI_API_KEY": "openai_api_key", - "ANTHROPIC_API_KEY": "anthropic_api_key", + # These are environment variable names, not actual secrets + attr_map = { # pragma: allowlist secret + "OPENAI_API_KEY": "openai_api_key", # pragma: allowlist secret + "ANTHROPIC_API_KEY": "anthropic_api_key", # pragma: allowlist secret "OLLAMA_ENDPOINT": "ollama_endpoint", - "WATSONX_API_KEY": "watsonx_api_key", + "WATSONX_API_KEY": "watsonx_api_key", # pragma: allowlist secret "WATSONX_ENDPOINT": "watsonx_endpoint", "WATSONX_PROJECT_ID": "watsonx_project_id", - "OPENSEARCH_PASSWORD": "opensearch_password", - "LANGFLOW_SECRET_KEY": "langflow_secret_key", + "OPENSEARCH_PASSWORD": "opensearch_password", # pragma: allowlist secret + "LANGFLOW_SECRET_KEY": "langflow_secret_key", # pragma: allowlist secret "LANGFLOW_SUPERUSER": "langflow_superuser", - "LANGFLOW_SUPERUSER_PASSWORD": "langflow_superuser_password", + "LANGFLOW_SUPERUSER_PASSWORD": "langflow_superuser_password", # pragma: allowlist secret "LANGFLOW_CHAT_FLOW_ID": "langflow_chat_flow_id", "LANGFLOW_INGEST_FLOW_ID": "langflow_ingest_flow_id", "LANGFLOW_URL_INGEST_FLOW_ID": "langflow_url_ingest_flow_id", "NUDGES_FLOW_ID": "nudges_flow_id", "GOOGLE_OAUTH_CLIENT_ID": "google_oauth_client_id", - "GOOGLE_OAUTH_CLIENT_SECRET": "google_oauth_client_secret", + "GOOGLE_OAUTH_CLIENT_SECRET": "google_oauth_client_secret", # pragma: allowlist secret "MICROSOFT_GRAPH_OAUTH_CLIENT_ID": "microsoft_graph_oauth_client_id", - "MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET": "microsoft_graph_oauth_client_secret", + "MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET": "microsoft_graph_oauth_client_secret", # pragma: allowlist secret "WEBHOOK_BASE_URL": "webhook_base_url", "AWS_ACCESS_KEY_ID": "aws_access_key_id", - "AWS_SECRET_ACCESS_KEY": "aws_secret_access_key", + "AWS_SECRET_ACCESS_KEY": "aws_secret_access_key", # pragma: allowlist secret "LANGFLOW_PUBLIC_URL": "langflow_public_url", "OPENRAG_DOCUMENTS_PATHS": "openrag_documents_paths", "OPENSEARCH_DATA_PATH": "opensearch_data_path", From 2dddbbdbee6fc7f47ffaead1943a795ba03571fa Mon Sep 17 00:00:00 2001 From: Mike Fortman Date: Tue, 9 Dec 2025 14:40:47 -0600 Subject: [PATCH 07/11] baseline --- .secrets.baseline | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/.secrets.baseline b/.secrets.baseline index d5de1c8c..28837d45 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -148,24 +148,6 @@ "line_number": 30 } ], - "src/connectors/onedrive/connector.py": [ - { - "type": "Secret Keyword", - "filename": "src/connectors/onedrive/connector.py", - "hashed_secret": "bf45445eaa4f57092a404fa8c7338d59d8c0ecef", - "is_verified": false, - "line_number": 18 - } - ], - "src/connectors/sharepoint/connector.py": [ - { - "type": "Secret Keyword", - "filename": "src/connectors/sharepoint/connector.py", - "hashed_secret": "bf45445eaa4f57092a404fa8c7338d59d8c0ecef", - "is_verified": false, - "line_number": 19 - } - ], "src/main.py": [ { "type": "Base64 High Entropy String", @@ -194,5 +176,5 @@ } ] }, - "generated_at": "2025-12-09T20:24:08Z" + "generated_at": "2025-12-09T20:33:13Z" } From e454b64f84367ef1c02df788402288c8eac1f8a7 Mon Sep 17 00:00:00 2001 From: Mike Fortman Date: Tue, 9 Dec 2025 15:10:21 -0600 Subject: [PATCH 08/11] setup dependabot --- .github/dependabot.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..3b871ae9 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +version: 2 + +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" + commit-message: + prefix: "build(deps):" + include: scope + From 2d0a20dc68cc8d09c6b1c25c64078d1e06183fc4 Mon Sep 17 00:00:00 2001 From: Mike Fortman Date: Tue, 9 Dec 2025 16:39:35 -0600 Subject: [PATCH 09/11] make flow names consistent --- flows/openrag_agent.json | 2 +- flows/openrag_nudges.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/flows/openrag_agent.json b/flows/openrag_agent.json index d9475aac..bb1adc71 100644 --- a/flows/openrag_agent.json +++ b/flows/openrag_agent.json @@ -4787,7 +4787,7 @@ "is_component": false, "locked": true, "last_tested_version": "1.7.0.dev21", - "name": "OpenRAG OpenSearch Agent", + "name": "OpenRAG OpenSearch Agent Flow", "tags": [ "assistants", "agents" diff --git a/flows/openrag_nudges.json b/flows/openrag_nudges.json index d9d79e60..475833f9 100644 --- a/flows/openrag_nudges.json +++ b/flows/openrag_nudges.json @@ -4114,7 +4114,7 @@ "is_component": false, "locked": true, "last_tested_version": "1.7.0.dev21", - "name": "OpenRAG OpenSearch Nudges", + "name": "OpenRAG OpenSearch Nudges Flow", "tags": [ "assistants", "agents" From a8d2e16bf48ab510b8793bcc789329dd34b93ad9 Mon Sep 17 00:00:00 2001 From: Mike Fortman Date: Wed, 10 Dec 2025 13:43:11 -0600 Subject: [PATCH 10/11] knip cleanup for ui --- frontend/components/discord-link.tsx | 47 - frontend/components/file-upload-area.tsx | 103 - frontend/components/github-star-button.tsx | 47 - .../components/knowledge-filter-dropdown.tsx | 458 -- frontend/components/login-required.tsx | 48 - frontend/components/mode-toggle.tsx | 23 - frontend/components/navigation-layout.tsx | 81 - frontend/components/ui/checkbox.tsx | 30 - frontend/components/ui/dot-pattern.tsx | 158 - .../components/ui/inputs/embedding-model.tsx | 67 - frontend/components/ui/navigation-menu.tsx | 128 - frontend/components/ui/radio-group.tsx | 44 - frontend/components/ui/separator.tsx | 31 - frontend/hooks/use-discord-members.ts | 56 - frontend/hooks/use-github-stars.ts | 52 - frontend/knip.config.ts | 18 + frontend/lib/format-count.ts | 13 - frontend/package-lock.json | 4388 ++--------------- frontend/package.json | 13 +- 19 files changed, 482 insertions(+), 5323 deletions(-) delete mode 100644 frontend/components/discord-link.tsx delete mode 100644 frontend/components/file-upload-area.tsx delete mode 100644 frontend/components/github-star-button.tsx delete mode 100644 frontend/components/knowledge-filter-dropdown.tsx delete mode 100644 frontend/components/login-required.tsx delete mode 100644 frontend/components/mode-toggle.tsx delete mode 100644 frontend/components/navigation-layout.tsx delete mode 100644 frontend/components/ui/checkbox.tsx delete mode 100644 frontend/components/ui/dot-pattern.tsx delete mode 100644 frontend/components/ui/inputs/embedding-model.tsx delete mode 100644 frontend/components/ui/navigation-menu.tsx delete mode 100644 frontend/components/ui/radio-group.tsx delete mode 100644 frontend/components/ui/separator.tsx delete mode 100644 frontend/hooks/use-discord-members.ts delete mode 100644 frontend/hooks/use-github-stars.ts create mode 100644 frontend/knip.config.ts delete mode 100644 frontend/lib/format-count.ts diff --git a/frontend/components/discord-link.tsx b/frontend/components/discord-link.tsx deleted file mode 100644 index 584daa26..00000000 --- a/frontend/components/discord-link.tsx +++ /dev/null @@ -1,47 +0,0 @@ -"use client"; - -import * as React from "react"; -import { cn } from "@/lib/utils"; -import { useDiscordMembers } from "@/hooks/use-discord-members"; -import { formatCount } from "@/lib/format-count"; - -interface DiscordLinkProps { - inviteCode?: string; - className?: string; -} - -const DiscordLink = React.forwardRef( - ({ inviteCode = "EqksyE2EX9", className }, ref) => { - const { data, isLoading, error } = useDiscordMembers(inviteCode); - - return ( - - - - - - {isLoading - ? "..." - : error - ? "--" - : data - ? formatCount(data.approximate_member_count) - : "--"} - - - ); - }, -); - -DiscordLink.displayName = "DiscordLink"; - -export { DiscordLink }; diff --git a/frontend/components/file-upload-area.tsx b/frontend/components/file-upload-area.tsx deleted file mode 100644 index 84f0806a..00000000 --- a/frontend/components/file-upload-area.tsx +++ /dev/null @@ -1,103 +0,0 @@ -"use client"; - -import * as React from "react"; -import { cn } from "@/lib/utils"; -import { Button } from "@/components/ui/button"; -import { Loader2 } from "lucide-react"; - -interface FileUploadAreaProps { - onFileSelected?: (file: File) => void; - isLoading?: boolean; - className?: string; -} - -const FileUploadArea = React.forwardRef( - ({ onFileSelected, isLoading = false, className }, ref) => { - const [isDragging, setIsDragging] = React.useState(false); - const fileInputRef = React.useRef(null); - - const handleDragOver = (e: React.DragEvent) => { - e.preventDefault(); - setIsDragging(true); - }; - - const handleDragLeave = (e: React.DragEvent) => { - e.preventDefault(); - setIsDragging(false); - }; - - const handleDrop = (e: React.DragEvent) => { - e.preventDefault(); - setIsDragging(false); - - const files = Array.from(e.dataTransfer.files); - if (files.length > 0 && onFileSelected) { - onFileSelected(files[0]); - } - }; - - const handleFileSelect = (e: React.ChangeEvent) => { - const files = Array.from(e.target.files || []); - if (files.length > 0 && onFileSelected) { - onFileSelected(files[0]); - } - }; - - const handleClick = () => { - if (!isLoading) { - fileInputRef.current?.click(); - } - }; - - return ( -
- - -
- {isLoading && ( -
- -
- )} - -
-

- {isLoading - ? "Processing file..." - : "Drop files here or click to upload"} -

-

- {isLoading - ? "Please wait while your file is being processed" - : ""} -

-
- - {!isLoading && } -
-
- ); - }, -); - -FileUploadArea.displayName = "FileUploadArea"; - -export { FileUploadArea }; diff --git a/frontend/components/github-star-button.tsx b/frontend/components/github-star-button.tsx deleted file mode 100644 index 81e4ca98..00000000 --- a/frontend/components/github-star-button.tsx +++ /dev/null @@ -1,47 +0,0 @@ -"use client"; - -import * as React from "react"; -import { cn } from "@/lib/utils"; -import { Github } from "lucide-react"; -import { useGitHubStars } from "@/hooks/use-github-stars"; -import { formatCount } from "@/lib/format-count"; - -interface GitHubStarButtonProps { - repo?: string; - className?: string; -} - -const GitHubStarButton = React.forwardRef< - HTMLAnchorElement, - GitHubStarButtonProps ->(({ repo = "phact/openrag", className }, ref) => { - const { data, isLoading, error } = useGitHubStars(repo); - - return ( - - - - {isLoading - ? "..." - : error - ? "--" - : data - ? formatCount(data.stargazers_count) - : "--"} - - - ); -}); - -GitHubStarButton.displayName = "GitHubStarButton"; - -export { GitHubStarButton }; diff --git a/frontend/components/knowledge-filter-dropdown.tsx b/frontend/components/knowledge-filter-dropdown.tsx deleted file mode 100644 index cb2106d9..00000000 --- a/frontend/components/knowledge-filter-dropdown.tsx +++ /dev/null @@ -1,458 +0,0 @@ -"use client"; - -import { useState, useEffect, useRef } from "react"; -import { Button } from "@/components/ui/button"; -import { Input } from "@/components/ui/input"; -import { Card, CardContent } from "@/components/ui/card"; - -import { Label } from "@/components/ui/label"; -import { Textarea } from "@/components/ui/textarea"; -import { - ChevronDown, - Filter, - Search, - X, - Loader2, - Plus, - Save, -} from "lucide-react"; -import { cn } from "@/lib/utils"; - -interface KnowledgeFilter { - id: string; - name: string; - description: string; - query_data: string; - owner: string; - created_at: string; - updated_at: string; -} - -interface ParsedQueryData { - query: string; - filters: { - data_sources: string[]; - document_types: string[]; - owners: string[]; - }; - limit: number; - scoreThreshold: number; -} - -interface KnowledgeFilterDropdownProps { - selectedFilter: KnowledgeFilter | null; - onFilterSelect: (filter: KnowledgeFilter | null) => void; -} - -export function KnowledgeFilterDropdown({ - selectedFilter, - onFilterSelect, -}: KnowledgeFilterDropdownProps) { - const [isOpen, setIsOpen] = useState(false); - const [filters, setFilters] = useState([]); - const [loading, setLoading] = useState(false); - const [searchQuery, setSearchQuery] = useState(""); - const [showCreateModal, setShowCreateModal] = useState(false); - const [createName, setCreateName] = useState(""); - const [createDescription, setCreateDescription] = useState(""); - const [creating, setCreating] = useState(false); - const dropdownRef = useRef(null); - - const loadFilters = async (query = "") => { - setLoading(true); - try { - const response = await fetch("/api/knowledge-filter/search", { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - query, - limit: 20, // Limit for dropdown - }), - }); - - const result = await response.json(); - if (response.ok && result.success) { - setFilters(result.filters); - } else { - console.error("Failed to load knowledge filters:", result.error); - setFilters([]); - } - } catch (error) { - console.error("Error loading knowledge filters:", error); - setFilters([]); - } finally { - setLoading(false); - } - }; - - const deleteFilter = async (filterId: string, e: React.MouseEvent) => { - e.stopPropagation(); - - try { - const response = await fetch(`/api/knowledge-filter/${filterId}`, { - method: "DELETE", - }); - - if (response.ok) { - // Remove from local state - setFilters((prev) => prev.filter((f) => f.id !== filterId)); - - // If this was the selected filter, clear selection - if (selectedFilter?.id === filterId) { - onFilterSelect(null); - } - } else { - console.error("Failed to delete knowledge filter"); - } - } catch (error) { - console.error("Error deleting knowledge filter:", error); - } - }; - - const handleFilterSelect = (filter: KnowledgeFilter) => { - onFilterSelect(filter); - setIsOpen(false); - }; - - const handleClearFilter = () => { - onFilterSelect(null); - setIsOpen(false); - }; - - const handleCreateNew = () => { - setIsOpen(false); - setShowCreateModal(true); - }; - - const handleCreateFilter = async () => { - if (!createName.trim()) return; - - setCreating(true); - try { - // Create a basic filter with wildcards (match everything by default) - const defaultFilterData = { - query: "", - filters: { - data_sources: ["*"], - document_types: ["*"], - owners: ["*"], - }, - limit: 10, - scoreThreshold: 0, - }; - - const response = await fetch("/api/knowledge-filter", { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - name: createName.trim(), - description: createDescription.trim(), - queryData: JSON.stringify(defaultFilterData), - }), - }); - - const result = await response.json(); - if (response.ok && result.success) { - // Create the new filter object - const newFilter: KnowledgeFilter = { - id: result.filter.id, - name: createName.trim(), - description: createDescription.trim(), - query_data: JSON.stringify(defaultFilterData), - owner: result.filter.owner, - created_at: result.filter.created_at, - updated_at: result.filter.updated_at, - }; - - // Add to local filters list - setFilters((prev) => [newFilter, ...prev]); - - // Select the new filter - onFilterSelect(newFilter); - - // Close modal and reset form - setShowCreateModal(false); - setCreateName(""); - setCreateDescription(""); - } else { - console.error("Failed to create knowledge filter:", result.error); - } - } catch (error) { - console.error("Error creating knowledge filter:", error); - } finally { - setCreating(false); - } - }; - - const handleCancelCreate = () => { - setShowCreateModal(false); - setCreateName(""); - setCreateDescription(""); - }; - - const getFilterSummary = (filter: KnowledgeFilter): string => { - try { - const parsed = JSON.parse(filter.query_data) as ParsedQueryData; - const parts = []; - - if (parsed.query) parts.push(`"${parsed.query}"`); - if (parsed.filters.data_sources.length > 0) - parts.push(`${parsed.filters.data_sources.length} sources`); - if (parsed.filters.document_types.length > 0) - parts.push(`${parsed.filters.document_types.length} types`); - if (parsed.filters.owners.length > 0) - parts.push(`${parsed.filters.owners.length} owners`); - - return parts.join(" • ") || "No filters"; - } catch { - return "Invalid filter"; - } - }; - - useEffect(() => { - if (isOpen) { - loadFilters(); - } - }, [isOpen]); - - useEffect(() => { - const timeoutId = setTimeout(() => { - if (isOpen) { - loadFilters(searchQuery); - } - }, 300); - - return () => clearTimeout(timeoutId); - }, [searchQuery, isOpen]); - - // Close dropdown when clicking outside - useEffect(() => { - const handleClickOutside = (event: MouseEvent) => { - if ( - dropdownRef.current && - !dropdownRef.current.contains(event.target as Node) - ) { - setIsOpen(false); - } - }; - - document.addEventListener("mousedown", handleClickOutside); - return () => document.removeEventListener("mousedown", handleClickOutside); - }, []); - - return ( -
- - - {isOpen && ( - - - {/* Search Header */} -
-
- - setSearchQuery(e.target.value)} - className="pl-9 h-8 text-sm" - /> -
-
- - {/* Filter List */} -
- {/* Clear filter option */} -
-
- -
-
All Knowledge
-
- No filters applied -
-
-
-
- - {loading ? ( -
- - - Loading... - -
- ) : filters.length === 0 ? ( -
- {searchQuery ? "No filters found" : "No saved filters"} -
- ) : ( - filters.map((filter) => ( -
handleFilterSelect(filter)} - className={cn( - "flex items-center gap-3 p-3 hover:bg-accent hover:text-accent-foreground cursor-pointer group transition-colors", - selectedFilter?.id === filter.id && - "bg-accent text-accent-foreground", - )} - > -
- -
-
- {filter.name} -
-
- {getFilterSummary(filter)} -
-
-
- -
- )) - )} -
- - {/* Create New Filter Option */} -
-
- -
-
- Create New Filter -
-
- Save current search as filter -
-
-
-
- - {/* Selected Filter Details */} - {selectedFilter && ( -
-
- Selected: {selectedFilter.name} -
- {selectedFilter.description && ( -
- {selectedFilter.description} -
- )} -
- )} -
-
- )} - - {/* Create Filter Modal */} - {showCreateModal && ( -
-
-

- Create New Knowledge Filter -

- -
-
- - setCreateName(e.target.value)} - className="mt-1" - /> -
- -
- -