Merge remote-tracking branch 'origin/main' into fix/onboarding_store

This commit is contained in:
Lucas Oliveira 2026-01-08 17:37:56 -03:00
commit bd245f97a0
14 changed files with 274 additions and 68 deletions

66
.github/workflows/codeql.yml vendored Normal file
View file

@ -0,0 +1,66 @@
name: "CodeQL"
on:
push:
branches: [ 'main' ]
pull_request:
# The branches below must be a subset of the branches above
branches: [ 'main' ]
schedule:
- cron: '17 2 * * 1'
jobs:
analyze:
name: Analyze
runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
permissions:
actions: read
contents: read
security-events: write
strategy:
fail-fast: false
matrix:
language: [ 'python', 'javascript' ]
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
# Use only 'java' to analyze code written in Java, Kotlin or both
# Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
# Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
steps:
- name: Checkout repository
uses: actions/checkout@v6
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.
# For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
# queries: security-extended,security-and-quality
# Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v3
# Command-line programs to run using the OS shell.
# 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
# If the Autobuild fails above, remove it and uncomment the following three lines.
# modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
# - run: |
# echo "Run, Build Application using script"
# ./location_of_script_within_repo/buildscript.sh
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v3
with:
category: "/language:${{matrix.language}}"

View file

@ -2,8 +2,11 @@ name: Publish Python SDK
on: on:
push: push:
tags: branches:
- 'sdk-py-v*' - main
paths:
- 'sdks/python/pyproject.toml'
workflow_dispatch:
jobs: jobs:
publish: publish:
@ -22,24 +25,35 @@ jobs:
with: with:
python-version: '3.12' python-version: '3.12'
- name: Install build tools - name: Install uv
run: pip install build twine uses: astral-sh/setup-uv@v4
- name: Extract version from tag - name: Extract version from pyproject.toml
id: version id: version
run: | run: |
VERSION=${GITHUB_REF_NAME#sdk-py-v} VERSION=$(grep -Po '(?<=^version = ")[^"]*' pyproject.toml)
echo "version=$VERSION" >> $GITHUB_OUTPUT echo "version=$VERSION" >> $GITHUB_OUTPUT
- name: Update version in pyproject.toml - name: Check if version already published
id: check
run: | run: |
sed -i "s/^version = .*/version = \"${{ steps.version.outputs.version }}\"/" pyproject.toml HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" https://pypi.org/pypi/openrag-sdk/${{ steps.version.outputs.version }}/json)
if [ "$HTTP_STATUS" = "200" ]; then
echo "exists=true" >> $GITHUB_OUTPUT
else
echo "exists=false" >> $GITHUB_OUTPUT
fi
- name: Build package - name: Build package
run: python -m build if: steps.check.outputs.exists == 'false'
run: uv build
- name: Publish to PyPI - name: Publish to PyPI
run: twine upload dist/* if: steps.check.outputs.exists == 'false'
run: uv publish
env: env:
TWINE_USERNAME: __token__ UV_PUBLISH_TOKEN: ${{ secrets.UV_PUBLISH_TOKEN }}
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
- name: Skip publish (version exists)
if: steps.check.outputs.exists == 'true'
run: echo "Version ${{ steps.version.outputs.version }} already exists on PyPI, skipping publish"

View file

@ -2,13 +2,19 @@ name: Publish TypeScript SDK
on: on:
push: push:
tags: branches:
- 'sdk-ts-v*' - main
paths:
- 'sdks/typescript/package.json'
workflow_dispatch:
jobs: jobs:
publish: publish:
name: Publish to npm name: Publish to npm
runs-on: ubuntu-latest runs-on: ubuntu-latest
permissions:
contents: read
id-token: write
defaults: defaults:
run: run:
working-directory: sdks/typescript working-directory: sdks/typescript
@ -23,22 +29,36 @@ jobs:
node-version: '20' node-version: '20'
registry-url: 'https://registry.npmjs.org' registry-url: 'https://registry.npmjs.org'
- name: Update npm to latest
run: npm install -g npm@latest
- name: Extract version from package.json
id: version
run: |
VERSION=$(node -p "require('./package.json').version")
echo "version=$VERSION" >> $GITHUB_OUTPUT
- name: Check if version already published
id: check
run: |
if npm view openrag-sdk@${{ steps.version.outputs.version }} version 2>/dev/null; then
echo "exists=true" >> $GITHUB_OUTPUT
else
echo "exists=false" >> $GITHUB_OUTPUT
fi
- name: Install dependencies - name: Install dependencies
if: steps.check.outputs.exists == 'false'
run: npm ci run: npm ci
- name: Build - name: Build
if: steps.check.outputs.exists == 'false'
run: npm run build run: npm run build
- name: Extract version from tag
id: version
run: |
VERSION=${GITHUB_REF_NAME#sdk-ts-v}
echo "version=$VERSION" >> $GITHUB_OUTPUT
- name: Update package version
run: npm version ${{ steps.version.outputs.version }} --no-git-tag-version
- name: Publish to npm - name: Publish to npm
run: npm publish --access public if: steps.check.outputs.exists == 'false'
env: run: npm publish --access public --provenance
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
- name: Skip publish (version exists)
if: steps.check.outputs.exists == 'true'
run: echo "Version ${{ steps.version.outputs.version }} already exists on npm, skipping publish"

View file

@ -53,9 +53,12 @@ help:
@echo "" @echo ""
# Development environments # Development environments
# Use centralized env file from TUI if it exists, otherwise fall back to local .env
OPENRAG_ENV_FILE := $(shell if [ -f ~/.openrag/tui/.env ]; then echo "--env-file ~/.openrag/tui/.env"; fi)
dev: dev:
@echo "🚀 Starting OpenRAG with GPU support..." @echo "🚀 Starting OpenRAG with GPU support..."
docker compose -f docker-compose.yml -f docker-compose.gpu.yml up -d docker compose $(OPENRAG_ENV_FILE) -f docker-compose.yml -f docker-compose.gpu.yml up -d
@echo "✅ Services started!" @echo "✅ Services started!"
@echo " Backend: http://localhost:8000" @echo " Backend: http://localhost:8000"
@echo " Frontend: http://localhost:3000" @echo " Frontend: http://localhost:3000"
@ -65,7 +68,7 @@ dev:
dev-cpu: dev-cpu:
@echo "🚀 Starting OpenRAG with CPU only..." @echo "🚀 Starting OpenRAG with CPU only..."
docker compose up -d docker compose $(OPENRAG_ENV_FILE) up -d
@echo "✅ Services started!" @echo "✅ Services started!"
@echo " Backend: http://localhost:8000" @echo " Backend: http://localhost:8000"
@echo " Frontend: http://localhost:3000" @echo " Frontend: http://localhost:3000"
@ -75,7 +78,7 @@ dev-cpu:
dev-local: dev-local:
@echo "🔧 Starting infrastructure only (for local development)..." @echo "🔧 Starting infrastructure only (for local development)..."
docker compose up -d opensearch dashboards langflow docker compose $(OPENRAG_ENV_FILE) up -d opensearch dashboards langflow
@echo "✅ Infrastructure started!" @echo "✅ Infrastructure started!"
@echo " Langflow: http://localhost:7860" @echo " Langflow: http://localhost:7860"
@echo " OpenSearch: http://localhost:9200" @echo " OpenSearch: http://localhost:9200"
@ -85,7 +88,7 @@ dev-local:
infra: infra:
@echo "🔧 Starting infrastructure services only..." @echo "🔧 Starting infrastructure services only..."
docker compose up -d opensearch dashboards langflow docker compose $(OPENRAG_ENV_FILE) up -d opensearch dashboards langflow
@echo "✅ Infrastructure services started!" @echo "✅ Infrastructure services started!"
@echo " Langflow: http://localhost:7860" @echo " Langflow: http://localhost:7860"
@echo " OpenSearch: http://localhost:9200" @echo " OpenSearch: http://localhost:9200"
@ -93,7 +96,7 @@ infra:
infra-cpu: infra-cpu:
@echo "🔧 Starting infrastructure services only..." @echo "🔧 Starting infrastructure services only..."
docker compose up -d opensearch dashboards langflow docker compose $(OPENRAG_ENV_FILE) up -d opensearch dashboards langflow
@echo "✅ Infrastructure services started!" @echo "✅ Infrastructure services started!"
@echo " Langflow: http://localhost:7860" @echo " Langflow: http://localhost:7860"
@echo " OpenSearch: http://localhost:9200" @echo " OpenSearch: http://localhost:9200"
@ -102,13 +105,13 @@ infra-cpu:
# Container management # Container management
stop: stop:
@echo "🛑 Stopping all containers..." @echo "🛑 Stopping all containers..."
docker compose down docker compose $(OPENRAG_ENV_FILE) down
restart: stop dev restart: stop dev
clean: stop clean: stop
@echo "🧹 Cleaning up containers and volumes..." @echo "🧹 Cleaning up containers and volumes..."
docker compose down -v --remove-orphans docker compose $(OPENRAG_ENV_FILE) down -v --remove-orphans
docker system prune -f docker system prune -f
# Local development # Local development
@ -153,36 +156,36 @@ build-fe:
# Logging and debugging # Logging and debugging
logs: logs:
@echo "📋 Showing all container logs..." @echo "📋 Showing all container logs..."
docker compose logs -f docker compose $(OPENRAG_ENV_FILE) logs -f
logs-be: logs-be:
@echo "📋 Showing backend logs..." @echo "📋 Showing backend logs..."
docker compose logs -f openrag-backend docker compose $(OPENRAG_ENV_FILE) logs -f openrag-backend
logs-fe: logs-fe:
@echo "📋 Showing frontend logs..." @echo "📋 Showing frontend logs..."
docker compose logs -f openrag-frontend docker compose $(OPENRAG_ENV_FILE) logs -f openrag-frontend
logs-lf: logs-lf:
@echo "📋 Showing langflow logs..." @echo "📋 Showing langflow logs..."
docker compose logs -f langflow docker compose $(OPENRAG_ENV_FILE) logs -f langflow
logs-os: logs-os:
@echo "📋 Showing opensearch logs..." @echo "📋 Showing opensearch logs..."
docker compose logs -f opensearch docker compose $(OPENRAG_ENV_FILE) logs -f opensearch
# Shell access # Shell access
shell-be: shell-be:
@echo "🐚 Opening shell in backend container..." @echo "🐚 Opening shell in backend container..."
docker compose exec openrag-backend /bin/bash docker compose $(OPENRAG_ENV_FILE) exec openrag-backend /bin/bash
shell-lf: shell-lf:
@echo "🐚 Opening shell in langflow container..." @echo "🐚 Opening shell in langflow container..."
docker compose exec langflow /bin/bash docker compose $(OPENRAG_ENV_FILE) exec langflow /bin/bash
shell-os: shell-os:
@echo "🐚 Opening shell in opensearch container..." @echo "🐚 Opening shell in opensearch container..."
docker compose exec opensearch /bin/bash docker compose $(OPENRAG_ENV_FILE) exec opensearch /bin/bash
# Testing and quality # Testing and quality
test: test:
@ -414,7 +417,7 @@ lint:
# Service status # Service status
status: status:
@echo "📊 Container status:" @echo "📊 Container status:"
@docker compose ps 2>/dev/null || echo "No containers running" @docker compose $(OPENRAG_ENV_FILE) ps 2>/dev/null || echo "No containers running"
health: health:
@echo "🏥 Health check:" @echo "🏥 Health check:"

View file

@ -1 +1 @@
1. If you modified the built-in flows or created custom flows in your [OpenRAG Langflow instance](/agents), and you want to preserve those changes, then you must [export your flows](https://docs.langflow.org/concepts-flows-import) before starting this process. Afterwards, you can import your flows or reference the exported flow JSON as needed. 1. If you modified the built-in flows or created custom flows in your [OpenRAG Langflow instance](/agents), [export your flows](https://docs.langflow.org/concepts-flows-import) before starting this process. Although OpenRAG can preserve changes to the built-in flows, it doesn't preserve user-created flows. As a general best practice, exporting your flows is recommended to create backups of your customizations. Afterwards, you can reimport your flows or reference the exported flow JSON as needed.

View file

@ -157,13 +157,18 @@ You can [monitor ingestion](#monitor-ingestion) to see the progress of the uploa
## Ingest knowledge from URLs {#url-flow} ## Ingest knowledge from URLs {#url-flow}
When using the OpenRAG chat, you can enter URLs into the chat to be ingested in real-time during your conversation.
:::tip
Use [UTF-8 encoding](https://www.w3schools.com/tags/ref_urlencode.ASP) for URLs with special characters other than the standard slash, period, and colon characters.
For example, use `https://en.wikipedia.org/wiki/Caf%C3%A9` instead of `https://en.wikipedia.org/wiki/Café` or `https://en.wikipedia.org/wiki/Coffee%5Fculture` instead of `https://en.wikipedia.org/wiki/Coffee_culture`.
:::
The **OpenSearch URL Ingestion** flow is used to ingest web content from URLs. The **OpenSearch URL Ingestion** flow is used to ingest web content from URLs.
This flow isn't directly accessible from the OpenRAG user interface. This flow isn't directly accessible from the OpenRAG user interface.
Instead, this flow is called by the [**OpenRAG OpenSearch Agent** flow](/chat#flow) as a Model Context Protocol (MCP) tool. Instead, this flow is called by the [**OpenRAG OpenSearch Agent** flow](/chat#flow) as a Model Context Protocol (MCP) tool.
The agent can call this component to fetch web content from a given URL, and then ingest that content into your OpenSearch knowledge base. The agent can call this component to fetch web content from a given URL, and then ingest that content into your OpenSearch knowledge base.
Like all OpenRAG flows, you can [inspect the flow in Langflow](/agents#inspect-and-modify-flows), and you can customize it. Like all OpenRAG flows, you can [inspect the flow in Langflow](/agents#inspect-and-modify-flows), and you can customize it.
For more information about MCP in Langflow, see the Langflow documentation on [MCP clients](https://docs.langflow.org/mcp-client) and [MCP servers](https://docs.langflow.org/mcp-tutorial). For more information about MCP in Langflow, see the Langflow documentation on [MCP clients](https://docs.langflow.org/mcp-client) and [MCP servers](https://docs.langflow.org/mcp-tutorial).
## Monitor ingestion ## Monitor ingestion

View file

@ -139,6 +139,8 @@ To reset your OpenRAG deployment _and_ delete all OpenRAG data, see [Reinstall O
3. Repeat the [setup process](/install#setup) to restart the services and launch the OpenRAG app. Your OpenRAG passwords, OAuth credentials (if previously set), and onboarding configuration are restored from the `.env` file. 3. Repeat the [setup process](/install#setup) to restart the services and launch the OpenRAG app. Your OpenRAG passwords, OAuth credentials (if previously set), and onboarding configuration are restored from the `.env` file.
4. If you exported customized flows, [import your flows](https://docs.langflow.org/concepts-flows-import) into Langflow after completing the onboarding process.
</TabItem> </TabItem>
<TabItem value="env" label="Self-managed services"> <TabItem value="env" label="Self-managed services">

View file

@ -53,7 +53,20 @@ export function KnowledgeFilterList({
}; };
const parseQueryData = (queryData: string): ParsedQueryData => { const parseQueryData = (queryData: string): ParsedQueryData => {
return JSON.parse(queryData) as ParsedQueryData; const parsed = JSON.parse(queryData);
// Provide defaults for missing fields to handle API-created filters
return {
query: parsed.query ?? "",
filters: {
data_sources: parsed.filters?.data_sources ?? ["*"],
document_types: parsed.filters?.document_types ?? ["*"],
owners: parsed.filters?.owners ?? ["*"],
},
limit: parsed.limit ?? 10,
scoreThreshold: parsed.scoreThreshold ?? 0,
color: parsed.color ?? "zinc",
icon: parsed.icon ?? "filter",
};
}; };
return ( return (

View file

@ -96,15 +96,16 @@ export function KnowledgeFilterPanel() {
setQuery(parsedFilterData.query || ""); setQuery(parsedFilterData.query || "");
// Set the actual filter selections from the saved knowledge filter // Set the actual filter selections from the saved knowledge filter
const filters = parsedFilterData.filters; const filters = parsedFilterData.filters || {};
// Use the exact selections from the saved filter // Use the exact selections from the saved filter
// Empty arrays mean "none selected" not "all selected" // Empty arrays mean "none selected" not "all selected"
// Provide defaults for missing fields to handle API-created filters
const processedFilters = { const processedFilters = {
data_sources: filters.data_sources, data_sources: filters.data_sources ?? ["*"],
document_types: filters.document_types, document_types: filters.document_types ?? ["*"],
owners: filters.owners, owners: filters.owners ?? ["*"],
connector_types: filters.connector_types || ["*"], connector_types: filters.connector_types ?? ["*"],
}; };
console.log("[DEBUG] Loading filter selections:", processedFilters); console.log("[DEBUG] Loading filter selections:", processedFilters);
@ -114,8 +115,8 @@ export function KnowledgeFilterPanel() {
setScoreThreshold(parsedFilterData.scoreThreshold || 0); setScoreThreshold(parsedFilterData.scoreThreshold || 0);
setName(selectedFilter.name); setName(selectedFilter.name);
setDescription(selectedFilter.description || ""); setDescription(selectedFilter.description || "");
setColor(parsedFilterData.color); setColor(parsedFilterData.color ?? "zinc");
setIconKey(parsedFilterData.icon); setIconKey(parsedFilterData.icon ?? "filter");
} }
}, [selectedFilter, parsedFilterData]); }, [selectedFilter, parsedFilterData]);
@ -123,13 +124,20 @@ export function KnowledgeFilterPanel() {
useEffect(() => { useEffect(() => {
if (createMode && parsedFilterData) { if (createMode && parsedFilterData) {
setQuery(parsedFilterData.query || ""); setQuery(parsedFilterData.query || "");
setSelectedFilters(parsedFilterData.filters); // Provide defaults for missing filter fields
const filters = parsedFilterData.filters || {};
setSelectedFilters({
data_sources: filters.data_sources ?? ["*"],
document_types: filters.document_types ?? ["*"],
owners: filters.owners ?? ["*"],
connector_types: filters.connector_types ?? ["*"],
});
setResultLimit(parsedFilterData.limit || 10); setResultLimit(parsedFilterData.limit || 10);
setScoreThreshold(parsedFilterData.scoreThreshold || 0); setScoreThreshold(parsedFilterData.scoreThreshold || 0);
setName(""); setName("");
setDescription(""); setDescription("");
setColor(parsedFilterData.color); setColor(parsedFilterData.color ?? "zinc");
setIconKey(parsedFilterData.icon); setIconKey(parsedFilterData.icon ?? "filter");
} }
}, [createMode, parsedFilterData]); }, [createMode, parsedFilterData]);

View file

@ -50,7 +50,10 @@ export function MultiSelect({
const [open, setOpen] = React.useState(false); const [open, setOpen] = React.useState(false);
const [searchValue, setSearchValue] = React.useState(""); const [searchValue, setSearchValue] = React.useState("");
const isAllSelected = value.includes("*"); // Normalize value to empty array if undefined/null to prevent crashes
const safeValue = value ?? [];
const isAllSelected = safeValue.includes("*");
const filteredOptions = options.filter((option) => const filteredOptions = options.filter((option) =>
option.label.toLowerCase().includes(searchValue.toLowerCase()), option.label.toLowerCase().includes(searchValue.toLowerCase()),
@ -66,12 +69,12 @@ export function MultiSelect({
} }
} else { } else {
let newValue: string[]; let newValue: string[];
if (value.includes(optionValue)) { if (safeValue.includes(optionValue)) {
// Remove the item // Remove the item
newValue = value.filter((v) => v !== optionValue && v !== "*"); newValue = safeValue.filter((v) => v !== optionValue && v !== "*");
} else { } else {
// Add the item and remove "All" if present // Add the item and remove "All" if present
newValue = [...value.filter((v) => v !== "*"), optionValue]; newValue = [...safeValue.filter((v) => v !== "*"), optionValue];
// Check max selection limit // Check max selection limit
if (maxSelection && newValue.length > maxSelection) { if (maxSelection && newValue.length > maxSelection) {
@ -87,7 +90,7 @@ export function MultiSelect({
return allOptionLabel; return allOptionLabel;
} }
if (value.length === 0) { if (safeValue.length === 0) {
return placeholder; return placeholder;
} }
@ -96,7 +99,7 @@ export function MultiSelect({
.toLowerCase() .toLowerCase()
.replace("select ", "") .replace("select ", "")
.replace("...", ""); .replace("...", "");
return `${value.length} ${noun}`; return `${safeValue.length} ${noun}`;
}; };
return ( return (
@ -152,7 +155,7 @@ export function MultiSelect({
<Check <Check
className={cn( className={cn(
"mr-2 h-4 w-4", "mr-2 h-4 w-4",
value.includes(option.value) safeValue.includes(option.value)
? "opacity-100" ? "opacity-100"
: "opacity-0", : "opacity-0",
)} )}

View file

@ -84,7 +84,22 @@ export function KnowledgeFilterProvider({
if (filter) { if (filter) {
setCreateMode(false); setCreateMode(false);
try { try {
const parsed = JSON.parse(filter.query_data) as ParsedQueryData; const raw = JSON.parse(filter.query_data);
// Normalize parsed data with defaults for missing fields
// This handles filters created via API with incomplete queryData
const parsed: ParsedQueryData = {
query: raw.query ?? "",
filters: {
data_sources: raw.filters?.data_sources ?? ["*"],
document_types: raw.filters?.document_types ?? ["*"],
owners: raw.filters?.owners ?? ["*"],
connector_types: raw.filters?.connector_types ?? ["*"],
},
limit: raw.limit ?? 10,
scoreThreshold: raw.scoreThreshold ?? 0,
color: raw.color ?? "zinc",
icon: raw.icon ?? "filter",
};
setParsedFilterData(parsed); setParsedFilterData(parsed);
// Auto-open panel when filter is selected // Auto-open panel when filter is selected

View file

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "openrag-sdk" name = "openrag-sdk"
version = "0.1.1" version = "0.1.2"
description = "Official Python SDK for OpenRAG API" description = "Official Python SDK for OpenRAG API"
readme = "README.md" readme = "README.md"
license = "MIT" license = "MIT"
@ -59,3 +59,4 @@ select = ["E", "F", "I", "UP"]
[tool.mypy] [tool.mypy]
python_version = "3.10" python_version = "3.10"
strict = true strict = true

View file

@ -1,6 +1,6 @@
{ {
"name": "openrag-sdk", "name": "openrag-sdk",
"version": "0.1.1", "version": "0.1.2",
"description": "Official TypeScript/JavaScript SDK for OpenRAG API", "description": "Official TypeScript/JavaScript SDK for OpenRAG API",
"main": "./dist/index.js", "main": "./dist/index.js",
"module": "./dist/index.mjs", "module": "./dist/index.mjs",
@ -56,3 +56,4 @@
"node": ">=18.0.0" "node": ">=18.0.0"
} }
} }

View file

@ -8,6 +8,42 @@ from utils.logging_config import get_logger
logger = get_logger(__name__) logger = get_logger(__name__)
def normalize_query_data(query_data: str | dict) -> str:
"""
Normalize query_data to ensure all required fields exist with defaults.
This prevents frontend crashes when API-created filters have incomplete data.
"""
# Parse if string
if isinstance(query_data, str):
try:
data = json.loads(query_data)
except json.JSONDecodeError:
data = {}
else:
data = query_data or {}
# Ensure filters object exists with all required fields
filters = data.get("filters") or {}
normalized_filters = {
"data_sources": filters.get("data_sources", ["*"]),
"document_types": filters.get("document_types", ["*"]),
"owners": filters.get("owners", ["*"]),
"connector_types": filters.get("connector_types", ["*"]),
}
# Build normalized query_data with defaults
normalized = {
"query": data.get("query", ""),
"filters": normalized_filters,
"limit": data.get("limit", 10),
"scoreThreshold": data.get("scoreThreshold", 0),
"color": data.get("color", "zinc"),
"icon": data.get("icon", "filter"),
}
return json.dumps(normalized)
async def create_knowledge_filter( async def create_knowledge_filter(
request: Request, knowledge_filter_service, session_manager request: Request, knowledge_filter_service, session_manager
): ):
@ -25,6 +61,15 @@ async def create_knowledge_filter(
if not query_data: if not query_data:
return JSONResponse({"error": "Query data is required"}, status_code=400) return JSONResponse({"error": "Query data is required"}, status_code=400)
# Normalize query_data to ensure all required fields exist
try:
normalized_query_data = normalize_query_data(query_data)
except Exception as e:
logger.error(f"Failed to normalize query_data: {e}")
return JSONResponse(
{"error": f"Invalid queryData format: {str(e)}"}, status_code=400
)
user = request.state.user user = request.state.user
jwt_token = session_manager.get_effective_jwt_token(user.user_id, request.state.jwt_token) jwt_token = session_manager.get_effective_jwt_token(user.user_id, request.state.jwt_token)
@ -34,7 +79,7 @@ async def create_knowledge_filter(
"id": filter_id, "id": filter_id,
"name": name, "name": name,
"description": description, "description": description,
"query_data": query_data, # Store the full search query JSON "query_data": normalized_query_data, # Store normalized query JSON with defaults
"owner": user.user_id, "owner": user.user_id,
"allowed_users": payload.get("allowedUsers", []), # ACL field for future use "allowed_users": payload.get("allowedUsers", []), # ACL field for future use
"allowed_groups": payload.get("allowedGroups", []), # ACL field for future use "allowed_groups": payload.get("allowedGroups", []), # ACL field for future use
@ -158,12 +203,22 @@ async def update_knowledge_filter(
{"error": "Failed to delete existing knowledge filter"}, status_code=500 {"error": "Failed to delete existing knowledge filter"}, status_code=500
) )
# Normalize query_data if provided, otherwise use existing
query_data = payload.get("queryData", existing_filter["query_data"])
try:
normalized_query_data = normalize_query_data(query_data)
except Exception as e:
logger.error(f"Failed to normalize query_data: {e}")
return JSONResponse(
{"error": f"Invalid queryData format: {str(e)}"}, status_code=400
)
# Create updated knowledge filter document with same ID # Create updated knowledge filter document with same ID
updated_filter = { updated_filter = {
"id": filter_id, "id": filter_id,
"name": payload.get("name", existing_filter["name"]), "name": payload.get("name", existing_filter["name"]),
"description": payload.get("description", existing_filter["description"]), "description": payload.get("description", existing_filter["description"]),
"query_data": payload.get("queryData", existing_filter["query_data"]), "query_data": normalized_query_data,
"owner": existing_filter["owner"], "owner": existing_filter["owner"],
"allowed_users": payload.get( "allowed_users": payload.get(
"allowedUsers", existing_filter.get("allowed_users", []) "allowedUsers", existing_filter.get("allowed_users", [])