Merge branch 'main' into npm-deps

This commit is contained in:
Mike Fortman 2026-01-07 09:38:22 -06:00 committed by GitHub
commit d13ae8d9de
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 141 additions and 44 deletions

View file

@ -53,9 +53,12 @@ help:
@echo ""
# Development environments
# Use centralized env file from TUI if it exists, otherwise fall back to local .env
OPENRAG_ENV_FILE := $(shell if [ -f ~/.openrag/tui/.env ]; then echo "--env-file ~/.openrag/tui/.env"; fi)
dev:
@echo "🚀 Starting OpenRAG with GPU support..."
docker compose -f docker-compose.yml -f docker-compose.gpu.yml up -d
docker compose $(OPENRAG_ENV_FILE) -f docker-compose.yml -f docker-compose.gpu.yml up -d
@echo "✅ Services started!"
@echo " Backend: http://localhost:8000"
@echo " Frontend: http://localhost:3000"
@ -65,7 +68,7 @@ dev:
dev-cpu:
@echo "🚀 Starting OpenRAG with CPU only..."
docker compose up -d
docker compose $(OPENRAG_ENV_FILE) up -d
@echo "✅ Services started!"
@echo " Backend: http://localhost:8000"
@echo " Frontend: http://localhost:3000"
@ -75,7 +78,7 @@ dev-cpu:
dev-local:
@echo "🔧 Starting infrastructure only (for local development)..."
docker compose up -d opensearch dashboards langflow
docker compose $(OPENRAG_ENV_FILE) up -d opensearch dashboards langflow
@echo "✅ Infrastructure started!"
@echo " Langflow: http://localhost:7860"
@echo " OpenSearch: http://localhost:9200"
@ -85,7 +88,7 @@ dev-local:
infra:
@echo "🔧 Starting infrastructure services only..."
docker compose up -d opensearch dashboards langflow
docker compose $(OPENRAG_ENV_FILE) up -d opensearch dashboards langflow
@echo "✅ Infrastructure services started!"
@echo " Langflow: http://localhost:7860"
@echo " OpenSearch: http://localhost:9200"
@ -93,7 +96,7 @@ infra:
infra-cpu:
@echo "🔧 Starting infrastructure services only..."
docker compose up -d opensearch dashboards langflow
docker compose $(OPENRAG_ENV_FILE) up -d opensearch dashboards langflow
@echo "✅ Infrastructure services started!"
@echo " Langflow: http://localhost:7860"
@echo " OpenSearch: http://localhost:9200"
@ -102,13 +105,13 @@ infra-cpu:
# Container management
stop:
@echo "🛑 Stopping all containers..."
docker compose down
docker compose $(OPENRAG_ENV_FILE) down
restart: stop dev
clean: stop
@echo "🧹 Cleaning up containers and volumes..."
docker compose down -v --remove-orphans
docker compose $(OPENRAG_ENV_FILE) down -v --remove-orphans
docker system prune -f
# Local development
@ -153,36 +156,36 @@ build-fe:
# Logging and debugging
logs:
@echo "📋 Showing all container logs..."
docker compose logs -f
docker compose $(OPENRAG_ENV_FILE) logs -f
logs-be:
@echo "📋 Showing backend logs..."
docker compose logs -f openrag-backend
docker compose $(OPENRAG_ENV_FILE) logs -f openrag-backend
logs-fe:
@echo "📋 Showing frontend logs..."
docker compose logs -f openrag-frontend
docker compose $(OPENRAG_ENV_FILE) logs -f openrag-frontend
logs-lf:
@echo "📋 Showing langflow logs..."
docker compose logs -f langflow
docker compose $(OPENRAG_ENV_FILE) logs -f langflow
logs-os:
@echo "📋 Showing opensearch logs..."
docker compose logs -f opensearch
docker compose $(OPENRAG_ENV_FILE) logs -f opensearch
# Shell access
shell-be:
@echo "🐚 Opening shell in backend container..."
docker compose exec openrag-backend /bin/bash
docker compose $(OPENRAG_ENV_FILE) exec openrag-backend /bin/bash
shell-lf:
@echo "🐚 Opening shell in langflow container..."
docker compose exec langflow /bin/bash
docker compose $(OPENRAG_ENV_FILE) exec langflow /bin/bash
shell-os:
@echo "🐚 Opening shell in opensearch container..."
docker compose exec opensearch /bin/bash
docker compose $(OPENRAG_ENV_FILE) exec opensearch /bin/bash
# Testing and quality
test:
@ -414,7 +417,7 @@ lint:
# Service status
status:
@echo "📊 Container status:"
@docker compose ps 2>/dev/null || echo "No containers running"
@docker compose $(OPENRAG_ENV_FILE) ps 2>/dev/null || echo "No containers running"
health:
@echo "🏥 Health check:"

View file

@ -39,7 +39,7 @@ If OpenRAG detects OAuth credentials during setup, it recommends **Advanced Setu
4. Optional: Under **Others**, edit the [knowledge base](/knowledge) paths if you don't want to use the default paths:
* **Documents Paths**: One or more paths to directories are where OpenRAG looks for documents to ingest.
* **OpenSearch Data PAth**: Specify the path where you want OpenRAG to create your OpenSearch index.
* **OpenSearch Data Path**: Specify the path where you want OpenRAG to create your OpenSearch index.
5. Click **Save Configuration**.

View file

@ -139,7 +139,7 @@ The default value is 200 characters, which represents an overlap of 20 percent i
The default path for local uploads is `~/.openrag/documents`. This is mounted to the `/app/openrag-documents/` directory inside the OpenRAG container. Files added to the host or container directory are visible in both locations.
To change this location, modify the **Documents Paths** variable in either the [**Advanced Setup** menu](/install#setup) or in your [OpenRAG `.env` file](/reference/configuration).
To change this location, modify the **Documents Paths** variable in either the [**Basic/Advanced Setup** menu](/install#setup) or in your [OpenRAG `.env` file](/reference/configuration).
## Delete knowledge {#delete-knowledge}

View file

@ -114,7 +114,7 @@ The following variables are required or recommended:
PID: 27746
```
3. Deploy the OpenRAG containers locally using the appropriate Docker Compose configuration for your environment.
3. Deploy the OpenRAG containers locally using the appropriate Docker Compose configuration for your environment:
* **GPU-accelerated deployment**: If your host machine has an NVIDIA GPU with CUDA support and compatible NVIDIA drivers, use the base `docker-compose.yml` file with the `docker-compose.gpu.yml` override.

View file

@ -69,7 +69,7 @@ Control how OpenRAG [processes and ingests documents](/ingestion) into your know
| `DISABLE_INGEST_WITH_LANGFLOW` | `false` | Disable Langflow ingestion pipeline. |
| `DOCLING_OCR_ENGINE` | Set by OS | OCR engine for document processing. For macOS, `ocrmac`. For any other OS, `easyocr`. |
| `OCR_ENABLED` | `false` | Enable OCR for image processing. |
| `OPENRAG_DOCUMENTS_PATHS` | `~/.openrag/documents` | Document paths for ingestion. |
| `OPENRAG_DOCUMENTS_PATH` | `~/.openrag/documents` | The [local documents path](/knowledge#set-the-local-documents-path) for ingestion. |
| `PICTURE_DESCRIPTIONS_ENABLED` | `false` | Enable picture descriptions. |
## Langflow settings {#langflow-settings}

View file

@ -53,7 +53,20 @@ export function KnowledgeFilterList({
};
const parseQueryData = (queryData: string): ParsedQueryData => {
return JSON.parse(queryData) as ParsedQueryData;
const parsed = JSON.parse(queryData);
// Provide defaults for missing fields to handle API-created filters
return {
query: parsed.query ?? "",
filters: {
data_sources: parsed.filters?.data_sources ?? ["*"],
document_types: parsed.filters?.document_types ?? ["*"],
owners: parsed.filters?.owners ?? ["*"],
},
limit: parsed.limit ?? 10,
scoreThreshold: parsed.scoreThreshold ?? 0,
color: parsed.color ?? "zinc",
icon: parsed.icon ?? "filter",
};
};
return (

View file

@ -96,15 +96,16 @@ export function KnowledgeFilterPanel() {
setQuery(parsedFilterData.query || "");
// Set the actual filter selections from the saved knowledge filter
const filters = parsedFilterData.filters;
const filters = parsedFilterData.filters || {};
// Use the exact selections from the saved filter
// Empty arrays mean "none selected" not "all selected"
// Provide defaults for missing fields to handle API-created filters
const processedFilters = {
data_sources: filters.data_sources,
document_types: filters.document_types,
owners: filters.owners,
connector_types: filters.connector_types || ["*"],
data_sources: filters.data_sources ?? ["*"],
document_types: filters.document_types ?? ["*"],
owners: filters.owners ?? ["*"],
connector_types: filters.connector_types ?? ["*"],
};
console.log("[DEBUG] Loading filter selections:", processedFilters);
@ -114,8 +115,8 @@ export function KnowledgeFilterPanel() {
setScoreThreshold(parsedFilterData.scoreThreshold || 0);
setName(selectedFilter.name);
setDescription(selectedFilter.description || "");
setColor(parsedFilterData.color);
setIconKey(parsedFilterData.icon);
setColor(parsedFilterData.color ?? "zinc");
setIconKey(parsedFilterData.icon ?? "filter");
}
}, [selectedFilter, parsedFilterData]);
@ -123,13 +124,20 @@ export function KnowledgeFilterPanel() {
useEffect(() => {
if (createMode && parsedFilterData) {
setQuery(parsedFilterData.query || "");
setSelectedFilters(parsedFilterData.filters);
// Provide defaults for missing filter fields
const filters = parsedFilterData.filters || {};
setSelectedFilters({
data_sources: filters.data_sources ?? ["*"],
document_types: filters.document_types ?? ["*"],
owners: filters.owners ?? ["*"],
connector_types: filters.connector_types ?? ["*"],
});
setResultLimit(parsedFilterData.limit || 10);
setScoreThreshold(parsedFilterData.scoreThreshold || 0);
setName("");
setDescription("");
setColor(parsedFilterData.color);
setIconKey(parsedFilterData.icon);
setColor(parsedFilterData.color ?? "zinc");
setIconKey(parsedFilterData.icon ?? "filter");
}
}, [createMode, parsedFilterData]);

View file

@ -50,7 +50,10 @@ export function MultiSelect({
const [open, setOpen] = React.useState(false);
const [searchValue, setSearchValue] = React.useState("");
const isAllSelected = value.includes("*");
// Normalize value to empty array if undefined/null to prevent crashes
const safeValue = value ?? [];
const isAllSelected = safeValue.includes("*");
const filteredOptions = options.filter((option) =>
option.label.toLowerCase().includes(searchValue.toLowerCase()),
@ -66,12 +69,12 @@ export function MultiSelect({
}
} else {
let newValue: string[];
if (value.includes(optionValue)) {
if (safeValue.includes(optionValue)) {
// Remove the item
newValue = value.filter((v) => v !== optionValue && v !== "*");
newValue = safeValue.filter((v) => v !== optionValue && v !== "*");
} else {
// Add the item and remove "All" if present
newValue = [...value.filter((v) => v !== "*"), optionValue];
newValue = [...safeValue.filter((v) => v !== "*"), optionValue];
// Check max selection limit
if (maxSelection && newValue.length > maxSelection) {
@ -87,7 +90,7 @@ export function MultiSelect({
return allOptionLabel;
}
if (value.length === 0) {
if (safeValue.length === 0) {
return placeholder;
}
@ -96,7 +99,7 @@ export function MultiSelect({
.toLowerCase()
.replace("select ", "")
.replace("...", "");
return `${value.length} ${noun}`;
return `${safeValue.length} ${noun}`;
};
return (
@ -152,7 +155,7 @@ export function MultiSelect({
<Check
className={cn(
"mr-2 h-4 w-4",
value.includes(option.value)
safeValue.includes(option.value)
? "opacity-100"
: "opacity-0",
)}

View file

@ -84,7 +84,22 @@ export function KnowledgeFilterProvider({
if (filter) {
setCreateMode(false);
try {
const parsed = JSON.parse(filter.query_data) as ParsedQueryData;
const raw = JSON.parse(filter.query_data);
// Normalize parsed data with defaults for missing fields
// This handles filters created via API with incomplete queryData
const parsed: ParsedQueryData = {
query: raw.query ?? "",
filters: {
data_sources: raw.filters?.data_sources ?? ["*"],
document_types: raw.filters?.document_types ?? ["*"],
owners: raw.filters?.owners ?? ["*"],
connector_types: raw.filters?.connector_types ?? ["*"],
},
limit: raw.limit ?? 10,
scoreThreshold: raw.scoreThreshold ?? 0,
color: raw.color ?? "zinc",
icon: raw.icon ?? "filter",
};
setParsedFilterData(parsed);
// Auto-open panel when filter is selected

View file

@ -8,6 +8,42 @@ from utils.logging_config import get_logger
logger = get_logger(__name__)
def normalize_query_data(query_data: str | dict) -> str:
"""
Normalize query_data to ensure all required fields exist with defaults.
This prevents frontend crashes when API-created filters have incomplete data.
"""
# Parse if string
if isinstance(query_data, str):
try:
data = json.loads(query_data)
except json.JSONDecodeError:
data = {}
else:
data = query_data or {}
# Ensure filters object exists with all required fields
filters = data.get("filters") or {}
normalized_filters = {
"data_sources": filters.get("data_sources", ["*"]),
"document_types": filters.get("document_types", ["*"]),
"owners": filters.get("owners", ["*"]),
"connector_types": filters.get("connector_types", ["*"]),
}
# Build normalized query_data with defaults
normalized = {
"query": data.get("query", ""),
"filters": normalized_filters,
"limit": data.get("limit", 10),
"scoreThreshold": data.get("scoreThreshold", 0),
"color": data.get("color", "zinc"),
"icon": data.get("icon", "filter"),
}
return json.dumps(normalized)
async def create_knowledge_filter(
request: Request, knowledge_filter_service, session_manager
):
@ -25,6 +61,15 @@ async def create_knowledge_filter(
if not query_data:
return JSONResponse({"error": "Query data is required"}, status_code=400)
# Normalize query_data to ensure all required fields exist
try:
normalized_query_data = normalize_query_data(query_data)
except Exception as e:
logger.error(f"Failed to normalize query_data: {e}")
return JSONResponse(
{"error": f"Invalid queryData format: {str(e)}"}, status_code=400
)
user = request.state.user
jwt_token = session_manager.get_effective_jwt_token(user.user_id, request.state.jwt_token)
@ -34,7 +79,7 @@ async def create_knowledge_filter(
"id": filter_id,
"name": name,
"description": description,
"query_data": query_data, # Store the full search query JSON
"query_data": normalized_query_data, # Store normalized query JSON with defaults
"owner": user.user_id,
"allowed_users": payload.get("allowedUsers", []), # ACL field for future use
"allowed_groups": payload.get("allowedGroups", []), # ACL field for future use
@ -158,12 +203,22 @@ async def update_knowledge_filter(
{"error": "Failed to delete existing knowledge filter"}, status_code=500
)
# Normalize query_data if provided, otherwise use existing
query_data = payload.get("queryData", existing_filter["query_data"])
try:
normalized_query_data = normalize_query_data(query_data)
except Exception as e:
logger.error(f"Failed to normalize query_data: {e}")
return JSONResponse(
{"error": f"Invalid queryData format: {str(e)}"}, status_code=400
)
# Create updated knowledge filter document with same ID
updated_filter = {
"id": filter_id,
"name": payload.get("name", existing_filter["name"]),
"description": payload.get("description", existing_filter["description"]),
"query_data": payload.get("queryData", existing_filter["query_data"]),
"query_data": normalized_query_data,
"owner": existing_filter["owner"],
"allowed_users": payload.get(
"allowedUsers", existing_filter.get("allowed_users", [])

View file

@ -37,9 +37,9 @@ class DoclingManager:
self._starting = False
self._external_process = False
# PID file to track docling-serve across sessions (in current working directory)
from pathlib import Path
self._pid_file = Path.cwd() / ".docling.pid"
# PID file to track docling-serve across sessions (centralized in ~/.openrag/tui/)
from utils.paths import get_tui_dir
self._pid_file = get_tui_dir() / ".docling.pid"
# Log storage - simplified, no queue
self._log_buffer: List[str] = []