Merge branch 'main' into npm-deps

2026-01-07 09:38:22 -06:00 · 2026-01-07 09:38:22 -06:00 · d13ae8d9de
commit d13ae8d9de
parent 8b0e4a1a9c a197b77212
11 changed files with 141 additions and 44 deletions
--- a/35
+++ b/35
@ -53,9 +53,12 @@ help:
 	@echo ""

 # Development environments
+# Use centralized env file from TUI if it exists, otherwise fall back to local .env
+OPENRAG_ENV_FILE := $(shell if [ -f ~/.openrag/tui/.env ]; then echo "--env-file ~/.openrag/tui/.env"; fi)
+
 dev:
 	@echo "🚀 Starting OpenRAG with GPU support..."
-	docker compose -f docker-compose.yml -f docker-compose.gpu.yml up -d
+	docker compose $(OPENRAG_ENV_FILE) -f docker-compose.yml -f docker-compose.gpu.yml up -d
 	@echo "✅ Services started!"
 	@echo "   Backend: http://localhost:8000"
 	@echo "   Frontend: http://localhost:3000"
@ -65,7 +68,7 @@ dev:

 dev-cpu:
 	@echo "🚀 Starting OpenRAG with CPU only..."
-	docker compose up -d
+	docker compose $(OPENRAG_ENV_FILE) up -d
 	@echo "✅ Services started!"
 	@echo "   Backend: http://localhost:8000"
 	@echo "   Frontend: http://localhost:3000"
@ -75,7 +78,7 @@ dev-cpu:

 dev-local:
 	@echo "🔧 Starting infrastructure only (for local development)..."
-	docker compose up -d opensearch dashboards langflow
+	docker compose $(OPENRAG_ENV_FILE) up -d opensearch dashboards langflow
 	@echo "✅ Infrastructure started!"
 	@echo "   Langflow: http://localhost:7860"
 	@echo "   OpenSearch: http://localhost:9200"
@ -85,7 +88,7 @@ dev-local:

 infra:
 	@echo "🔧 Starting infrastructure services only..."
-	docker compose up -d opensearch dashboards langflow
+	docker compose $(OPENRAG_ENV_FILE) up -d opensearch dashboards langflow
 	@echo "✅ Infrastructure services started!"
 	@echo "   Langflow: http://localhost:7860"
 	@echo "   OpenSearch: http://localhost:9200"
@ -93,7 +96,7 @@ infra:

 infra-cpu:
 	@echo "🔧 Starting infrastructure services only..."
-	docker compose up -d opensearch dashboards langflow
+	docker compose $(OPENRAG_ENV_FILE) up -d opensearch dashboards langflow
 	@echo "✅ Infrastructure services started!"
 	@echo "   Langflow: http://localhost:7860"
 	@echo "   OpenSearch: http://localhost:9200"
@ -102,13 +105,13 @@ infra-cpu:
 # Container management
 stop:
 	@echo "🛑 Stopping all containers..."
-	docker compose down
+	docker compose $(OPENRAG_ENV_FILE) down

 restart: stop dev

 clean: stop
 	@echo "🧹 Cleaning up containers and volumes..."
-	docker compose down -v --remove-orphans
+	docker compose $(OPENRAG_ENV_FILE) down -v --remove-orphans
 	docker system prune -f

 # Local development
@ -153,36 +156,36 @@ build-fe:
 # Logging and debugging
 logs:
 	@echo "📋 Showing all container logs..."
-	docker compose logs -f
+	docker compose $(OPENRAG_ENV_FILE) logs -f

 logs-be:
 	@echo "📋 Showing backend logs..."
-	docker compose logs -f openrag-backend
+	docker compose $(OPENRAG_ENV_FILE) logs -f openrag-backend

 logs-fe:
 	@echo "📋 Showing frontend logs..."
-	docker compose logs -f openrag-frontend
+	docker compose $(OPENRAG_ENV_FILE) logs -f openrag-frontend

 logs-lf:
 	@echo "📋 Showing langflow logs..."
-	docker compose logs -f langflow
+	docker compose $(OPENRAG_ENV_FILE) logs -f langflow

 logs-os:
 	@echo "📋 Showing opensearch logs..."
-	docker compose logs -f opensearch
+	docker compose $(OPENRAG_ENV_FILE) logs -f opensearch

 # Shell access
 shell-be:
 	@echo "🐚 Opening shell in backend container..."
-	docker compose exec openrag-backend /bin/bash
+	docker compose $(OPENRAG_ENV_FILE) exec openrag-backend /bin/bash

 shell-lf:
 	@echo "🐚 Opening shell in langflow container..."
-	docker compose exec langflow /bin/bash
+	docker compose $(OPENRAG_ENV_FILE) exec langflow /bin/bash

 shell-os:
 	@echo "🐚 Opening shell in opensearch container..."
-	docker compose exec opensearch /bin/bash
+	docker compose $(OPENRAG_ENV_FILE) exec opensearch /bin/bash

 # Testing and quality
 test:
@ -414,7 +417,7 @@ lint:
 # Service status
 status:
 	@echo "📊 Container status:"
-	@docker compose ps 2>/dev/null || echo "No containers running"
+	@docker compose $(OPENRAG_ENV_FILE) ps 2>/dev/null || echo "No containers running"

 health:
 	@echo "🏥 Health check:"
--- a/docs/docs/_partial-setup.mdx
+++ b/docs/docs/_partial-setup.mdx
@ -39,7 +39,7 @@ If OpenRAG detects OAuth credentials during setup, it recommends **Advanced Setu
 4. Optional: Under **Others**, edit the [knowledge base](/knowledge) paths if you don't want to use the default paths:

   * **Documents Paths**: One or more paths to directories are where OpenRAG looks for documents to ingest.
-   * **OpenSearch Data PAth**: Specify the path where you want OpenRAG to create your OpenSearch index.
+   * **OpenSearch Data Path**: Specify the path where you want OpenRAG to create your OpenSearch index.

 5. Click **Save Configuration**.

--- a/docs/docs/core-components/knowledge.mdx
+++ b/docs/docs/core-components/knowledge.mdx
@ -139,7 +139,7 @@ The default value is 200 characters, which represents an overlap of 20 percent i

 The default path for local uploads is `~/.openrag/documents`. This is mounted to the `/app/openrag-documents/` directory inside the OpenRAG container. Files added to the host or container directory are visible in both locations.

-To change this location, modify the **Documents Paths** variable in either the [**Advanced Setup** menu](/install#setup) or in your [OpenRAG `.env` file](/reference/configuration).
+To change this location, modify the **Documents Paths** variable in either the [**Basic/Advanced Setup** menu](/install#setup) or in your [OpenRAG `.env` file](/reference/configuration).

 ## Delete knowledge {#delete-knowledge}

--- a/docs/docs/get-started/docker.mdx
+++ b/docs/docs/get-started/docker.mdx
@ -114,7 +114,7 @@ The following variables are required or recommended:
   PID: 27746
   ```

-3. Deploy the OpenRAG containers locally using the appropriate Docker Compose configuration for your environment.
+3. Deploy the OpenRAG containers locally using the appropriate Docker Compose configuration for your environment:

   * **GPU-accelerated deployment**: If your host machine has an NVIDIA GPU with CUDA support and compatible NVIDIA drivers, use the base `docker-compose.yml` file with the `docker-compose.gpu.yml` override.

--- a/docs/docs/reference/configuration.mdx
+++ b/docs/docs/reference/configuration.mdx
@ -69,7 +69,7 @@ Control how OpenRAG [processes and ingests documents](/ingestion) into your know
 | `DISABLE_INGEST_WITH_LANGFLOW` | `false` | Disable Langflow ingestion pipeline. |
 | `DOCLING_OCR_ENGINE` | Set by OS | OCR engine for document processing. For macOS, `ocrmac`. For any other OS, `easyocr`. |
 | `OCR_ENABLED` | `false` | Enable OCR for image processing. |
-| `OPENRAG_DOCUMENTS_PATHS` | `~/.openrag/documents` | Document paths for ingestion. |
+| `OPENRAG_DOCUMENTS_PATH` | `~/.openrag/documents` | The [local documents path](/knowledge#set-the-local-documents-path) for ingestion. |
 | `PICTURE_DESCRIPTIONS_ENABLED` | `false` | Enable picture descriptions. |

 ## Langflow settings {#langflow-settings}
--- a/frontend/components/knowledge-filter-list.tsx
+++ b/frontend/components/knowledge-filter-list.tsx
@ -53,7 +53,20 @@ export function KnowledgeFilterList({
 	};

 	const parseQueryData = (queryData: string): ParsedQueryData => {
-		return JSON.parse(queryData) as ParsedQueryData;
+		const parsed = JSON.parse(queryData);
+		// Provide defaults for missing fields to handle API-created filters
+		return {
+			query: parsed.query ?? "",
+			filters: {
+				data_sources: parsed.filters?.data_sources ?? ["*"],
+				document_types: parsed.filters?.document_types ?? ["*"],
+				owners: parsed.filters?.owners ?? ["*"],
+			},
+			limit: parsed.limit ?? 10,
+			scoreThreshold: parsed.scoreThreshold ?? 0,
+			color: parsed.color ?? "zinc",
+			icon: parsed.icon ?? "filter",
+		};
 	};

 	return (
--- a/frontend/components/knowledge-filter-panel.tsx
+++ b/frontend/components/knowledge-filter-panel.tsx
@ -96,15 +96,16 @@ export function KnowledgeFilterPanel() {
      setQuery(parsedFilterData.query || "");

      // Set the actual filter selections from the saved knowledge filter
-      const filters = parsedFilterData.filters;
+      const filters = parsedFilterData.filters || {};

      // Use the exact selections from the saved filter
      // Empty arrays mean "none selected" not "all selected"
+      // Provide defaults for missing fields to handle API-created filters
      const processedFilters = {
-        data_sources: filters.data_sources,
-        document_types: filters.document_types,
-        owners: filters.owners,
-        connector_types: filters.connector_types || ["*"],
+        data_sources: filters.data_sources ?? ["*"],
+        document_types: filters.document_types ?? ["*"],
+        owners: filters.owners ?? ["*"],
+        connector_types: filters.connector_types ?? ["*"],
      };

      console.log("[DEBUG] Loading filter selections:", processedFilters);
@ -114,8 +115,8 @@ export function KnowledgeFilterPanel() {
      setScoreThreshold(parsedFilterData.scoreThreshold || 0);
      setName(selectedFilter.name);
      setDescription(selectedFilter.description || "");
-      setColor(parsedFilterData.color);
-      setIconKey(parsedFilterData.icon);
+      setColor(parsedFilterData.color ?? "zinc");
+      setIconKey(parsedFilterData.icon ?? "filter");
    }
  }, [selectedFilter, parsedFilterData]);

@ -123,13 +124,20 @@ export function KnowledgeFilterPanel() {
  useEffect(() => {
    if (createMode && parsedFilterData) {
      setQuery(parsedFilterData.query || "");
-      setSelectedFilters(parsedFilterData.filters);
+      // Provide defaults for missing filter fields
+      const filters = parsedFilterData.filters || {};
+      setSelectedFilters({
+        data_sources: filters.data_sources ?? ["*"],
+        document_types: filters.document_types ?? ["*"],
+        owners: filters.owners ?? ["*"],
+        connector_types: filters.connector_types ?? ["*"],
+      });
      setResultLimit(parsedFilterData.limit || 10);
      setScoreThreshold(parsedFilterData.scoreThreshold || 0);
      setName("");
      setDescription("");
-      setColor(parsedFilterData.color);
-      setIconKey(parsedFilterData.icon);
+      setColor(parsedFilterData.color ?? "zinc");
+      setIconKey(parsedFilterData.icon ?? "filter");
    }
  }, [createMode, parsedFilterData]);

--- a/frontend/components/ui/multi-select.tsx
+++ b/frontend/components/ui/multi-select.tsx
@ -50,7 +50,10 @@ export function MultiSelect({
  const [open, setOpen] = React.useState(false);
  const [searchValue, setSearchValue] = React.useState("");

-  const isAllSelected = value.includes("*");
+  // Normalize value to empty array if undefined/null to prevent crashes
+  const safeValue = value ?? [];
+
+  const isAllSelected = safeValue.includes("*");

  const filteredOptions = options.filter((option) =>
    option.label.toLowerCase().includes(searchValue.toLowerCase()),
@ -66,12 +69,12 @@ export function MultiSelect({
      }
    } else {
      let newValue: string[];
-      if (value.includes(optionValue)) {
+      if (safeValue.includes(optionValue)) {
        // Remove the item
-        newValue = value.filter((v) => v !== optionValue && v !== "*");
+        newValue = safeValue.filter((v) => v !== optionValue && v !== "*");
      } else {
        // Add the item and remove "All" if present
-        newValue = [...value.filter((v) => v !== "*"), optionValue];
+        newValue = [...safeValue.filter((v) => v !== "*"), optionValue];

        // Check max selection limit
        if (maxSelection && newValue.length > maxSelection) {
@ -87,7 +90,7 @@ export function MultiSelect({
      return allOptionLabel;
    }

-    if (value.length === 0) {
+    if (safeValue.length === 0) {
      return placeholder;
    }

@ -96,7 +99,7 @@ export function MultiSelect({
      .toLowerCase()
      .replace("select ", "")
      .replace("...", "");
-    return `${value.length} ${noun}`;
+    return `${safeValue.length} ${noun}`;
  };

  return (
@ -152,7 +155,7 @@ export function MultiSelect({
                  <Check
                    className={cn(
                      "mr-2 h-4 w-4",
-                      value.includes(option.value)
+                      safeValue.includes(option.value)
                        ? "opacity-100"
                        : "opacity-0",
                    )}
--- a/frontend/contexts/knowledge-filter-context.tsx
+++ b/frontend/contexts/knowledge-filter-context.tsx
@ -84,7 +84,22 @@ export function KnowledgeFilterProvider({
    if (filter) {
      setCreateMode(false);
      try {
-        const parsed = JSON.parse(filter.query_data) as ParsedQueryData;
+        const raw = JSON.parse(filter.query_data);
+        // Normalize parsed data with defaults for missing fields
+        // This handles filters created via API with incomplete queryData
+        const parsed: ParsedQueryData = {
+          query: raw.query ?? "",
+          filters: {
+            data_sources: raw.filters?.data_sources ?? ["*"],
+            document_types: raw.filters?.document_types ?? ["*"],
+            owners: raw.filters?.owners ?? ["*"],
+            connector_types: raw.filters?.connector_types ?? ["*"],
+          },
+          limit: raw.limit ?? 10,
+          scoreThreshold: raw.scoreThreshold ?? 0,
+          color: raw.color ?? "zinc",
+          icon: raw.icon ?? "filter",
+        };
        setParsedFilterData(parsed);

        // Auto-open panel when filter is selected
--- a/src/api/knowledge_filter.py
+++ b/src/api/knowledge_filter.py
@ -8,6 +8,42 @@ from utils.logging_config import get_logger
 logger = get_logger(__name__)


+def normalize_query_data(query_data: str | dict) -> str:
+    """
+    Normalize query_data to ensure all required fields exist with defaults.
+    This prevents frontend crashes when API-created filters have incomplete data.
+    """
+    # Parse if string
+    if isinstance(query_data, str):
+        try:
+            data = json.loads(query_data)
+        except json.JSONDecodeError:
+            data = {}
+    else:
+        data = query_data or {}
+
+    # Ensure filters object exists with all required fields
+    filters = data.get("filters") or {}
+    normalized_filters = {
+        "data_sources": filters.get("data_sources", ["*"]),
+        "document_types": filters.get("document_types", ["*"]),
+        "owners": filters.get("owners", ["*"]),
+        "connector_types": filters.get("connector_types", ["*"]),
+    }
+
+    # Build normalized query_data with defaults
+    normalized = {
+        "query": data.get("query", ""),
+        "filters": normalized_filters,
+        "limit": data.get("limit", 10),
+        "scoreThreshold": data.get("scoreThreshold", 0),
+        "color": data.get("color", "zinc"),
+        "icon": data.get("icon", "filter"),
+    }
+
+    return json.dumps(normalized)
+
+
 async def create_knowledge_filter(
    request: Request, knowledge_filter_service, session_manager
 ):
@ -25,6 +61,15 @@ async def create_knowledge_filter(
    if not query_data:
        return JSONResponse({"error": "Query data is required"}, status_code=400)

+    # Normalize query_data to ensure all required fields exist
+    try:
+        normalized_query_data = normalize_query_data(query_data)
+    except Exception as e:
+        logger.error(f"Failed to normalize query_data: {e}")
+        return JSONResponse(
+            {"error": f"Invalid queryData format: {str(e)}"}, status_code=400
+        )
+
    user = request.state.user
    jwt_token = session_manager.get_effective_jwt_token(user.user_id, request.state.jwt_token)

@ -34,7 +79,7 @@ async def create_knowledge_filter(
        "id": filter_id,
        "name": name,
        "description": description,
-        "query_data": query_data,  # Store the full search query JSON
+        "query_data": normalized_query_data,  # Store normalized query JSON with defaults
        "owner": user.user_id,
        "allowed_users": payload.get("allowedUsers", []),  # ACL field for future use
        "allowed_groups": payload.get("allowedGroups", []),  # ACL field for future use
@ -158,12 +203,22 @@ async def update_knowledge_filter(
            {"error": "Failed to delete existing knowledge filter"}, status_code=500
        )

+    # Normalize query_data if provided, otherwise use existing
+    query_data = payload.get("queryData", existing_filter["query_data"])
+    try:
+        normalized_query_data = normalize_query_data(query_data)
+    except Exception as e:
+        logger.error(f"Failed to normalize query_data: {e}")
+        return JSONResponse(
+            {"error": f"Invalid queryData format: {str(e)}"}, status_code=400
+        )
+
    # Create updated knowledge filter document with same ID
    updated_filter = {
        "id": filter_id,
        "name": payload.get("name", existing_filter["name"]),
        "description": payload.get("description", existing_filter["description"]),
-        "query_data": payload.get("queryData", existing_filter["query_data"]),
+        "query_data": normalized_query_data,
        "owner": existing_filter["owner"],
        "allowed_users": payload.get(
            "allowedUsers", existing_filter.get("allowed_users", [])
--- a/src/tui/managers/docling_manager.py
+++ b/src/tui/managers/docling_manager.py
@ -37,9 +37,9 @@ class DoclingManager:
        self._starting = False
        self._external_process = False

-        # PID file to track docling-serve across sessions (in current working directory)
-        from pathlib import Path
-        self._pid_file = Path.cwd() / ".docling.pid"
+        # PID file to track docling-serve across sessions (centralized in ~/.openrag/tui/)
+        from utils.paths import get_tui_dir
+        self._pid_file = get_tui_dir() / ".docling.pid"

        # Log storage - simplified, no queue
        self._log_buffer: List[str] = []