+ {/* Right panel - Summary (TODO), Technical details, */}
+ {chunks.length > 0 && (
+
+
+
Technical details
+
+
+
-
+ Total chunks
+
+ -
+ {chunks.length}
+
+
+
+
-
+ Avg length
+
+ -
+ {averageChunkLength.toFixed(0)} chars
+
+
+ {/* TODO: Uncomment after data is available */}
+ {/*
- Process time
-
@@ -276,54 +270,55 @@ function ChunksPageContent() {
-
*/}
-
-
-
-
- Original document
-
-
- {/*
+
+
+
+
+ Original document
+
+
+ {/*
- Name
-
{fileData?.filename}
*/}
-
-
- Type
- -
- {fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
-
-
-
-
- Size
- -
- {fileData?.size
- ? `${Math.round(fileData.size / 1024)} KB`
- : "Unknown"}
-
-
- {/*
+
+
- Type
+ -
+ {fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
+
+
+
+
- Size
+ -
+ {fileData?.size
+ ? `${Math.round(fileData.size / 1024)} KB`
+ : "Unknown"}
+
+
+ {/*
- Uploaded
-
N/A
*/}
- {/* TODO: Uncomment after data is available */}
- {/*
+ {/* TODO: Uncomment after data is available */}
+ {/*
- Source
*/}
- {/*
+ {/*
- Updated
-
N/A
*/}
-
+
+
-
- )}
+ )}
+
);
}
diff --git a/pyproject.toml b/pyproject.toml
index ee0143f8..00f8409f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,10 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
[project]
name = "openrag"
-version = "0.1.15"
+version = "0.1.19"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.13"
@@ -31,6 +35,9 @@ dependencies = [
"docling-serve>=1.4.1",
]
+[dependency-groups]
+dev = ["pytest>=8", "pytest-asyncio>=0.21.0", "pytest-mock>=3.12.0", "pytest-cov>=4.0.0"]
+
[project.scripts]
openrag = "tui.main:run_tui"
diff --git a/scripts/docling_ctl.py b/scripts/docling_ctl.py
new file mode 100644
index 00000000..8dc5c879
--- /dev/null
+++ b/scripts/docling_ctl.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+"""Helper script to control docling-serve using DoclingManager for CI/testing."""
+
+import sys
+import asyncio
+import argparse
+from pathlib import Path
+
+# Add src to path so we can import DoclingManager
+sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+
+from tui.managers.docling_manager import DoclingManager
+
+
+async def start_docling(port: int = 5001, host: str = None, enable_ui: bool = False):
+ """Start docling-serve."""
+ manager = DoclingManager()
+
+ if manager.is_running():
+ print(f"Docling-serve is already running")
+ status = manager.get_status()
+ print(f"Endpoint: {status['endpoint']}")
+ return 0
+
+ host_msg = f"{host}:{port}" if host else f"auto-detected host:{port}"
+ print(f"Starting docling-serve on {host_msg}...")
+ success, message = await manager.start(port=port, host=host, enable_ui=enable_ui)
+
+ if success:
+ print(f"{message}")
+ status = manager.get_status()
+ print(f"Endpoint: {status['endpoint']}")
+ print(f"PID: {status['pid']}")
+ return 0
+ else:
+ print(f"{message}", file=sys.stderr)
+ return 1
+
+
+async def stop_docling():
+ """Stop docling-serve."""
+ manager = DoclingManager()
+
+ if not manager.is_running():
+ print("Docling-serve is not running")
+ return 0
+
+ print("Stopping docling-serve...")
+ success, message = await manager.stop()
+
+ if success:
+ print(f"{message}")
+ return 0
+ else:
+ print(f"{message}", file=sys.stderr)
+ return 1
+
+
+async def status_docling():
+ """Get docling-serve status."""
+ manager = DoclingManager()
+ status = manager.get_status()
+
+ print(f"Status: {status['status']}")
+ if status['status'] == 'running':
+ print(f"Endpoint: {status['endpoint']}")
+ print(f"Docs: {status['docs_url']}")
+ print(f"PID: {status['pid']}")
+
+ return 0 if status['status'] == 'running' else 1
+
+
+async def main():
+ parser = argparse.ArgumentParser(description="Control docling-serve for CI/testing")
+ parser.add_argument("command", choices=["start", "stop", "status"], help="Command to run")
+ parser.add_argument("--port", type=int, default=5001, help="Port to run on (default: 5001)")
+ parser.add_argument("--host", default=None, help="Host to bind to (default: auto-detect for containers)")
+ parser.add_argument("--enable-ui", action="store_true", help="Enable UI")
+
+ args = parser.parse_args()
+
+ if args.command == "start":
+ return await start_docling(port=args.port, host=args.host if args.host else None, enable_ui=args.enable_ui)
+ elif args.command == "stop":
+ return await stop_docling()
+ elif args.command == "status":
+ return await status_docling()
+
+
+if __name__ == "__main__":
+ sys.exit(asyncio.run(main()))
diff --git a/src/auth_middleware.py b/src/auth_middleware.py
index 44d1b2f0..1bc6cf04 100644
--- a/src/auth_middleware.py
+++ b/src/auth_middleware.py
@@ -28,7 +28,6 @@ def require_auth(session_manager):
async def wrapper(request: Request):
# In no-auth mode, bypass authentication entirely
if is_no_auth_mode():
- logger.debug("No-auth mode: Creating anonymous user")
# Create an anonymous user object so endpoints don't break
from session_manager import User
from datetime import datetime
@@ -36,7 +35,6 @@ def require_auth(session_manager):
from session_manager import AnonymousUser
request.state.user = AnonymousUser()
request.state.jwt_token = None # No JWT in no-auth mode
- logger.debug("Set user_id=anonymous, jwt_token=None")
return await handler(request)
user = get_current_user(request, session_manager)
diff --git a/src/config/settings.py b/src/config/settings.py
index 6f55520d..598ccfb2 100644
--- a/src/config/settings.py
+++ b/src/config/settings.py
@@ -13,8 +13,8 @@ from utils.container_utils import get_container_host
from utils.document_processing import create_document_converter
from utils.logging_config import get_logger
-load_dotenv()
-load_dotenv("../")
+load_dotenv(override=False)
+load_dotenv("../", override=False)
logger = get_logger(__name__)
@@ -61,12 +61,6 @@ DISABLE_INGEST_WITH_LANGFLOW = os.getenv(
def is_no_auth_mode():
"""Check if we're running in no-auth mode (OAuth credentials missing)"""
result = not (GOOGLE_OAUTH_CLIENT_ID and GOOGLE_OAUTH_CLIENT_SECRET)
- logger.debug(
- "Checking auth mode",
- no_auth_mode=result,
- has_client_id=GOOGLE_OAUTH_CLIENT_ID is not None,
- has_client_secret=GOOGLE_OAUTH_CLIENT_SECRET is not None,
- )
return result
diff --git a/src/main.py b/src/main.py
index a09d2488..1094f8b5 100644
--- a/src/main.py
+++ b/src/main.py
@@ -131,7 +131,7 @@ async def configure_alerting_security():
# Don't fail startup if alerting config fails
-async def _ensure_opensearch_index(self):
+async def _ensure_opensearch_index():
"""Ensure OpenSearch index exists when using traditional connector service."""
try:
# Check if index already exists
@@ -242,6 +242,9 @@ def generate_jwt_keys():
capture_output=True,
)
+ # Set restrictive permissions on private key (readable by owner only)
+ os.chmod(private_key_path, 0o600)
+
# Generate public key
subprocess.run(
[
@@ -257,12 +260,21 @@ def generate_jwt_keys():
capture_output=True,
)
+ # Set permissions on public key (readable by all)
+ os.chmod(public_key_path, 0o644)
+
logger.info("Generated RSA keys for JWT signing")
except subprocess.CalledProcessError as e:
logger.error("Failed to generate RSA keys", error=str(e))
raise
else:
- logger.info("RSA keys already exist, skipping generation")
+ # Ensure correct permissions on existing keys
+ try:
+ os.chmod(private_key_path, 0o600)
+ os.chmod(public_key_path, 0o644)
+ logger.info("RSA keys already exist, ensured correct permissions")
+ except OSError as e:
+ logger.warning("Failed to set permissions on existing keys", error=str(e))
async def init_index_when_ready():
diff --git a/src/services/document_service.py b/src/services/document_service.py
index 5204ea0e..d596fb25 100644
--- a/src/services/document_service.py
+++ b/src/services/document_service.py
@@ -126,7 +126,11 @@ class DocumentService:
from utils.file_utils import auto_cleanup_tempfile
import os
- with auto_cleanup_tempfile() as tmp_path:
+ # Preserve file extension for docling format detection
+ filename = upload_file.filename or "uploaded"
+ suffix = os.path.splitext(filename)[1] or ""
+
+ with auto_cleanup_tempfile(suffix=suffix) as tmp_path:
# Stream upload file to temporary file
file_size = 0
with open(tmp_path, 'wb') as tmp_file:
diff --git a/src/tui/__init__.py b/src/tui/__init__.py
index ab225908..0437803a 100644
--- a/src/tui/__init__.py
+++ b/src/tui/__init__.py
@@ -1 +1,8 @@
"""OpenRAG Terminal User Interface package."""
+
+from importlib.metadata import version
+
+try:
+ __version__ = version("openrag")
+except Exception:
+ __version__ = "unknown"
diff --git a/src/tui/_assets/docker-compose-cpu.yml b/src/tui/_assets/docker-compose-cpu.yml
deleted file mode 100644
index 4a1125f8..00000000
--- a/src/tui/_assets/docker-compose-cpu.yml
+++ /dev/null
@@ -1,122 +0,0 @@
-services:
- opensearch:
- image: phact/openrag-opensearch:${OPENRAG_VERSION:-latest}
- #build:
- # context: .
- # dockerfile: Dockerfile
- container_name: os
- depends_on:
- - openrag-backend
- environment:
- - discovery.type=single-node
- - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD}
- # Run security setup in background after OpenSearch starts
- command: >
- bash -c "
- # Start OpenSearch in background
- /usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
-
- # Wait a bit for OpenSearch to start, then apply security config
- sleep 10 && /usr/share/opensearch/setup-security.sh &
-
- # Wait for background processes
- wait
- "
- ports:
- - "9200:9200"
- - "9600:9600"
-
- dashboards:
- image: opensearchproject/opensearch-dashboards:3.0.0
- container_name: osdash
- depends_on:
- - opensearch
- environment:
- OPENSEARCH_HOSTS: '["https://opensearch:9200"]'
- OPENSEARCH_USERNAME: "admin"
- OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD}
- ports:
- - "5601:5601"
-
- openrag-backend:
- image: phact/openrag-backend:${OPENRAG_VERSION:-latest}
- #build:
- #context: .
- #dockerfile: Dockerfile.backend
- container_name: openrag-backend
- depends_on:
- - langflow
- environment:
- - OPENSEARCH_HOST=opensearch
- - LANGFLOW_URL=http://langflow:7860
- - LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL}
- - LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
- - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- - LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- - LANGFLOW_CHAT_FLOW_ID=${LANGFLOW_CHAT_FLOW_ID}
- - LANGFLOW_INGEST_FLOW_ID=${LANGFLOW_INGEST_FLOW_ID}
- - LANGFLOW_URL_INGEST_FLOW_ID=${LANGFLOW_URL_INGEST_FLOW_ID}
- - DISABLE_INGEST_WITH_LANGFLOW=${DISABLE_INGEST_WITH_LANGFLOW:-false}
- - NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
- - OPENSEARCH_PORT=9200
- - OPENSEARCH_USERNAME=admin
- - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- - OPENAI_API_KEY=${OPENAI_API_KEY}
- - NVIDIA_DRIVER_CAPABILITIES=compute,utility
- - NVIDIA_VISIBLE_DEVICES=all
- - GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID}
- - GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET}
- - MICROSOFT_GRAPH_OAUTH_CLIENT_ID=${MICROSOFT_GRAPH_OAUTH_CLIENT_ID}
- - MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=${MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET}
- - WEBHOOK_BASE_URL=${WEBHOOK_BASE_URL}
- - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
- volumes:
- - ./documents:/app/documents:Z
- - ./keys:/app/keys:Z
- - ./flows:/app/flows:Z
-
- openrag-frontend:
- image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
- #build:
- #context: .
- #dockerfile: Dockerfile.frontend
- container_name: openrag-frontend
- depends_on:
- - openrag-backend
- environment:
- - OPENRAG_BACKEND_HOST=openrag-backend
- ports:
- - "3000:3000"
-
- langflow:
- volumes:
- - ./flows:/app/flows:Z
- image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
- container_name: langflow
- ports:
- - "7860:7860"
- environment:
- - OPENAI_API_KEY=${OPENAI_API_KEY}
- - LANGFLOW_LOAD_FLOWS_PATH=/app/flows
- - LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
- - JWT=None
- - OWNER=None
- - OWNER_NAME=None
- - OWNER_EMAIL=None
- - CONNECTOR_TYPE=system
- - CONNECTOR_TYPE_URL=url
- - OPENRAG-QUERY-FILTER="{}"
- - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- - FILENAME=None
- - MIMETYPE=None
- - FILESIZE=0
- - LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD,OWNER,OWNER_NAME,OWNER_EMAIL,CONNECTOR_TYPE,FILENAME,MIMETYPE,FILESIZE
- - LANGFLOW_LOG_LEVEL=DEBUG
- - LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
- - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- - LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- - LANGFLOW_NEW_USER_IS_ACTIVE=${LANGFLOW_NEW_USER_IS_ACTIVE}
- - LANGFLOW_ENABLE_SUPERUSER_CLI=${LANGFLOW_ENABLE_SUPERUSER_CLI}
- # - DEFAULT_FOLDER_NAME=OpenRAG
- - HIDE_GETTING_STARTED_PROGRESS=true
diff --git a/src/tui/_assets/docker-compose-cpu.yml b/src/tui/_assets/docker-compose-cpu.yml
new file mode 120000
index 00000000..5ad7a663
--- /dev/null
+++ b/src/tui/_assets/docker-compose-cpu.yml
@@ -0,0 +1 @@
+../../../docker-compose-cpu.yml
\ No newline at end of file
diff --git a/src/tui/_assets/docker-compose.yml b/src/tui/_assets/docker-compose.yml
deleted file mode 100644
index 6cac6506..00000000
--- a/src/tui/_assets/docker-compose.yml
+++ /dev/null
@@ -1,122 +0,0 @@
-services:
- opensearch:
- image: phact/openrag-opensearch:${OPENRAG_VERSION:-latest}
- #build:
- #context: .
- #dockerfile: Dockerfile
- container_name: os
- depends_on:
- - openrag-backend
- environment:
- - discovery.type=single-node
- - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD}
- # Run security setup in background after OpenSearch starts
- command: >
- bash -c "
- # Start OpenSearch in background
- /usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
-
- # Wait a bit for OpenSearch to start, then apply security config
- sleep 10 && /usr/share/opensearch/setup-security.sh &
-
- # Wait for background processes
- wait
- "
- ports:
- - "9200:9200"
- - "9600:9600"
-
- dashboards:
- image: opensearchproject/opensearch-dashboards:3.0.0
- container_name: osdash
- depends_on:
- - opensearch
- environment:
- OPENSEARCH_HOSTS: '["https://opensearch:9200"]'
- OPENSEARCH_USERNAME: "admin"
- OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD}
- ports:
- - "5601:5601"
-
- openrag-backend:
- image: phact/openrag-backend:${OPENRAG_VERSION:-latest}
- #build:
- #context: .
- #dockerfile: Dockerfile.backend
- container_name: openrag-backend
- depends_on:
- - langflow
- environment:
- - OPENSEARCH_HOST=opensearch
- - LANGFLOW_URL=http://langflow:7860
- - LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL}
- - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- - LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- - LANGFLOW_CHAT_FLOW_ID=${LANGFLOW_CHAT_FLOW_ID}
- - LANGFLOW_INGEST_FLOW_ID=${LANGFLOW_INGEST_FLOW_ID}
- - LANGFLOW_URL_INGEST_FLOW_ID=${LANGFLOW_URL_INGEST_FLOW_ID}
- - DISABLE_INGEST_WITH_LANGFLOW=${DISABLE_INGEST_WITH_LANGFLOW:-false}
- - NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
- - OPENSEARCH_PORT=9200
- - OPENSEARCH_USERNAME=admin
- - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- - OPENAI_API_KEY=${OPENAI_API_KEY}
- - NVIDIA_DRIVER_CAPABILITIES=compute,utility
- - NVIDIA_VISIBLE_DEVICES=all
- - GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID}
- - GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET}
- - MICROSOFT_GRAPH_OAUTH_CLIENT_ID=${MICROSOFT_GRAPH_OAUTH_CLIENT_ID}
- - MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=${MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET}
- - WEBHOOK_BASE_URL=${WEBHOOK_BASE_URL}
- - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
- volumes:
- - ./documents:/app/documents:Z
- - ./keys:/app/keys:Z
- - ./flows:/app/flows:Z
- gpus: all
-
- openrag-frontend:
- image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
- #build:
- #context: .
- #dockerfile: Dockerfile.frontend
- container_name: openrag-frontend
- depends_on:
- - openrag-backend
- environment:
- - OPENRAG_BACKEND_HOST=openrag-backend
- ports:
- - "3000:3000"
-
- langflow:
- volumes:
- - ./flows:/app/flows:Z
- image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
- container_name: langflow
- ports:
- - "7860:7860"
- environment:
- - OPENAI_API_KEY=${OPENAI_API_KEY}
- - LANGFLOW_LOAD_FLOWS_PATH=/app/flows
- - LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
- - JWT=None
- - OWNER=None
- - OWNER_NAME=None
- - OWNER_EMAIL=None
- - CONNECTOR_TYPE=system
- - CONNECTOR_TYPE_URL=url
- - OPENRAG-QUERY-FILTER="{}"
- - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- - FILENAME=None
- - MIMETYPE=None
- - FILESIZE=0
- - LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD,OWNER,OWNER_NAME,OWNER_EMAIL,CONNECTOR_TYPE,FILENAME,MIMETYPE,FILESIZE
- - LANGFLOW_LOG_LEVEL=DEBUG
- - LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
- - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- - LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- - LANGFLOW_NEW_USER_IS_ACTIVE=${LANGFLOW_NEW_USER_IS_ACTIVE}
- - LANGFLOW_ENABLE_SUPERUSER_CLI=${LANGFLOW_ENABLE_SUPERUSER_CLI}
- # - DEFAULT_FOLDER_NAME="OpenRAG"
- - HIDE_GETTING_STARTED_PROGRESS=true
diff --git a/src/tui/_assets/docker-compose.yml b/src/tui/_assets/docker-compose.yml
new file mode 120000
index 00000000..5abefb89
--- /dev/null
+++ b/src/tui/_assets/docker-compose.yml
@@ -0,0 +1 @@
+../../../docker-compose.yml
\ No newline at end of file
diff --git a/src/tui/_assets/documents/2506.08231v1.pdf b/src/tui/_assets/documents/2506.08231v1.pdf
deleted file mode 100644
index 61e83265..00000000
Binary files a/src/tui/_assets/documents/2506.08231v1.pdf and /dev/null differ
diff --git a/src/tui/_assets/documents/2506.08231v1.pdf b/src/tui/_assets/documents/2506.08231v1.pdf
new file mode 120000
index 00000000..079e1ace
--- /dev/null
+++ b/src/tui/_assets/documents/2506.08231v1.pdf
@@ -0,0 +1 @@
+../../../../documents/2506.08231v1.pdf
\ No newline at end of file
diff --git a/src/tui/_assets/documents/ai-human-resources.pdf b/src/tui/_assets/documents/ai-human-resources.pdf
deleted file mode 100644
index 5e36eab4..00000000
Binary files a/src/tui/_assets/documents/ai-human-resources.pdf and /dev/null differ
diff --git a/src/tui/_assets/documents/ai-human-resources.pdf b/src/tui/_assets/documents/ai-human-resources.pdf
new file mode 120000
index 00000000..ba76acc5
--- /dev/null
+++ b/src/tui/_assets/documents/ai-human-resources.pdf
@@ -0,0 +1 @@
+../../../../documents/ai-human-resources.pdf
\ No newline at end of file
diff --git a/src/tui/_assets/documents/warmup_ocr.pdf b/src/tui/_assets/documents/warmup_ocr.pdf
deleted file mode 100644
index 8b17f8b2..00000000
Binary files a/src/tui/_assets/documents/warmup_ocr.pdf and /dev/null differ
diff --git a/src/tui/_assets/documents/warmup_ocr.pdf b/src/tui/_assets/documents/warmup_ocr.pdf
new file mode 120000
index 00000000..10a7670a
--- /dev/null
+++ b/src/tui/_assets/documents/warmup_ocr.pdf
@@ -0,0 +1 @@
+../../../../documents/warmup_ocr.pdf
\ No newline at end of file
diff --git a/src/tui/_assets/flows/components/ollama_embedding.json b/src/tui/_assets/flows/components/ollama_embedding.json
new file mode 120000
index 00000000..0e3a7516
--- /dev/null
+++ b/src/tui/_assets/flows/components/ollama_embedding.json
@@ -0,0 +1 @@
+../../../../../flows/components/ollama_embedding.json
\ No newline at end of file
diff --git a/src/tui/_assets/flows/components/ollama_llm.json b/src/tui/_assets/flows/components/ollama_llm.json
new file mode 120000
index 00000000..30c18f43
--- /dev/null
+++ b/src/tui/_assets/flows/components/ollama_llm.json
@@ -0,0 +1 @@
+../../../../../flows/components/ollama_llm.json
\ No newline at end of file
diff --git a/src/tui/_assets/flows/components/ollama_llm_text.json b/src/tui/_assets/flows/components/ollama_llm_text.json
new file mode 120000
index 00000000..1b55fd42
--- /dev/null
+++ b/src/tui/_assets/flows/components/ollama_llm_text.json
@@ -0,0 +1 @@
+../../../../../flows/components/ollama_llm_text.json
\ No newline at end of file
diff --git a/src/tui/_assets/flows/components/watsonx_embedding.json b/src/tui/_assets/flows/components/watsonx_embedding.json
new file mode 120000
index 00000000..3d349dac
--- /dev/null
+++ b/src/tui/_assets/flows/components/watsonx_embedding.json
@@ -0,0 +1 @@
+../../../../../flows/components/watsonx_embedding.json
\ No newline at end of file
diff --git a/src/tui/_assets/flows/components/watsonx_llm.json b/src/tui/_assets/flows/components/watsonx_llm.json
new file mode 120000
index 00000000..d19d7004
--- /dev/null
+++ b/src/tui/_assets/flows/components/watsonx_llm.json
@@ -0,0 +1 @@
+../../../../../flows/components/watsonx_llm.json
\ No newline at end of file
diff --git a/src/tui/_assets/flows/components/watsonx_llm_text.json b/src/tui/_assets/flows/components/watsonx_llm_text.json
new file mode 120000
index 00000000..8f760b2d
--- /dev/null
+++ b/src/tui/_assets/flows/components/watsonx_llm_text.json
@@ -0,0 +1 @@
+../../../../../flows/components/watsonx_llm_text.json
\ No newline at end of file
diff --git a/src/tui/_assets/flows/ingestion_flow.json b/src/tui/_assets/flows/ingestion_flow.json
new file mode 120000
index 00000000..6a00e536
--- /dev/null
+++ b/src/tui/_assets/flows/ingestion_flow.json
@@ -0,0 +1 @@
+../../../../flows/ingestion_flow.json
\ No newline at end of file
diff --git a/src/tui/_assets/flows/openrag_agent.json b/src/tui/_assets/flows/openrag_agent.json
new file mode 120000
index 00000000..fab81ca0
--- /dev/null
+++ b/src/tui/_assets/flows/openrag_agent.json
@@ -0,0 +1 @@
+../../../../flows/openrag_agent.json
\ No newline at end of file
diff --git a/src/tui/_assets/flows/openrag_ingest_docling.json b/src/tui/_assets/flows/openrag_ingest_docling.json
new file mode 120000
index 00000000..a23a93dc
--- /dev/null
+++ b/src/tui/_assets/flows/openrag_ingest_docling.json
@@ -0,0 +1 @@
+../../../../flows/openrag_ingest_docling.json
\ No newline at end of file
diff --git a/src/tui/_assets/flows/openrag_nudges.json b/src/tui/_assets/flows/openrag_nudges.json
new file mode 120000
index 00000000..b343ba5d
--- /dev/null
+++ b/src/tui/_assets/flows/openrag_nudges.json
@@ -0,0 +1 @@
+../../../../flows/openrag_nudges.json
\ No newline at end of file
diff --git a/src/tui/_assets/flows/openrag_url_mcp.json b/src/tui/_assets/flows/openrag_url_mcp.json
new file mode 120000
index 00000000..afb2d00e
--- /dev/null
+++ b/src/tui/_assets/flows/openrag_url_mcp.json
@@ -0,0 +1 @@
+../../../../flows/openrag_url_mcp.json
\ No newline at end of file
diff --git a/src/tui/main.py b/src/tui/main.py
index b68293fe..beee4497 100644
--- a/src/tui/main.py
+++ b/src/tui/main.py
@@ -2,6 +2,7 @@
import sys
from pathlib import Path
+from typing import Iterable, Optional
from textual.app import App, ComposeResult
from utils.logging_config import get_logger
try:
@@ -305,41 +306,103 @@ class OpenRAGTUI(App):
return True, "Runtime requirements satisfied"
-def copy_sample_documents():
+def _copy_assets(resource_tree, destination: Path, allowed_suffixes: Optional[Iterable[str]] = None, *, force: bool = False) -> None:
+ """Copy packaged assets into destination and optionally overwrite existing files.
+
+ When ``force`` is True, files are refreshed if the packaged bytes differ.
+ """
+ destination.mkdir(parents=True, exist_ok=True)
+
+ for resource in resource_tree.iterdir():
+ target_path = destination / resource.name
+
+ if resource.is_dir():
+ _copy_assets(resource, target_path, allowed_suffixes, force=force)
+ continue
+
+ if allowed_suffixes and not any(resource.name.endswith(suffix) for suffix in allowed_suffixes):
+ continue
+ resource_bytes = resource.read_bytes()
+
+ if target_path.exists():
+ if not force:
+ continue
+
+ try:
+ if target_path.read_bytes() == resource_bytes:
+ continue
+ except Exception as read_error:
+ logger.debug(f"Failed to read existing asset {target_path}: {read_error}")
+
+ target_path.write_bytes(resource_bytes)
+ logger.info(f"Copied bundled asset: {target_path}")
+
+
+def copy_sample_documents(*, force: bool = False) -> None:
"""Copy sample documents from package to current directory if they don't exist."""
documents_dir = Path("documents")
- # Check if documents directory already exists and has files
- if documents_dir.exists() and any(documents_dir.glob("*.pdf")):
- return # Documents already exist, don't overwrite
-
try:
- # Get sample documents from package assets
assets_files = files("tui._assets.documents")
-
- # Create documents directory if it doesn't exist
- documents_dir.mkdir(exist_ok=True)
-
- # Copy each sample document
- for resource in assets_files.iterdir():
- if resource.is_file() and resource.name.endswith('.pdf'):
- dest_path = documents_dir / resource.name
- if not dest_path.exists():
- content = resource.read_bytes()
- dest_path.write_bytes(content)
- logger.info(f"Copied sample document: {resource.name}")
-
+ _copy_assets(assets_files, documents_dir, allowed_suffixes=(".pdf",), force=force)
except Exception as e:
logger.debug(f"Could not copy sample documents: {e}")
# This is not a critical error - the app can work without sample documents
+def copy_sample_flows(*, force: bool = False) -> None:
+ """Copy sample flows from package to current directory if they don't exist."""
+ flows_dir = Path("flows")
+
+ try:
+ assets_files = files("tui._assets.flows")
+ _copy_assets(assets_files, flows_dir, allowed_suffixes=(".json",), force=force)
+ except Exception as e:
+ logger.debug(f"Could not copy sample flows: {e}")
+ # The app can proceed without bundled flows
+
+
+def copy_compose_files(*, force: bool = False) -> None:
+ """Copy docker-compose templates into the workspace if they are missing."""
+ try:
+ assets_root = files("tui._assets")
+ except Exception as e:
+ logger.debug(f"Could not access compose assets: {e}")
+ return
+
+ for filename in ("docker-compose.yml", "docker-compose-cpu.yml"):
+ destination = Path(filename)
+ if destination.exists() and not force:
+ continue
+
+ try:
+ resource = assets_root.joinpath(filename)
+ if not resource.is_file():
+ logger.debug(f"Compose template not found in assets: {filename}")
+ continue
+
+ resource_bytes = resource.read_bytes()
+ if destination.exists():
+ try:
+ if destination.read_bytes() == resource_bytes:
+ continue
+ except Exception as read_error:
+ logger.debug(f"Failed to read existing compose file {destination}: {read_error}")
+
+ destination.write_bytes(resource_bytes)
+ logger.info(f"Copied docker-compose template: {filename}")
+ except Exception as error:
+ logger.debug(f"Could not copy compose file {filename}: {error}")
+
+
def run_tui():
"""Run the OpenRAG TUI application."""
app = None
try:
- # Copy sample documents on first run
- copy_sample_documents()
+ # Keep bundled assets aligned with the packaged versions
+ copy_sample_documents(force=True)
+ copy_sample_flows(force=True)
+ copy_compose_files(force=True)
app = OpenRAGTUI()
app.run()
diff --git a/src/tui/screens/welcome.py b/src/tui/screens/welcome.py
index 217b0611..ea85de9e 100644
--- a/src/tui/screens/welcome.py
+++ b/src/tui/screens/welcome.py
@@ -10,6 +10,7 @@ from rich.text import Text
from rich.align import Align
from dotenv import load_dotenv
+from .. import __version__
from ..managers.container_manager import ContainerManager, ServiceStatus
from ..managers.env_manager import EnvManager
from ..managers.docling_manager import DoclingManager
@@ -116,7 +117,8 @@ class WelcomeScreen(Screen):
โโโโโโโ โโโ โโโโโโโโโโโ โโโโโโโโ โโโโโโ โโโโโโโโโโ
"""
welcome_text.append(ascii_art, style="bold white")
- welcome_text.append("Terminal User Interface for OpenRAG\n\n", style="dim")
+ welcome_text.append("Terminal User Interface for OpenRAG\n", style="dim")
+ welcome_text.append(f"v{__version__}\n\n", style="dim cyan")
# Check if all services are running
all_services_running = self.services_running and self.docling_running
diff --git a/src/utils/container_utils.py b/src/utils/container_utils.py
index 14222c84..746379e8 100644
--- a/src/utils/container_utils.py
+++ b/src/utils/container_utils.py
@@ -157,10 +157,22 @@ def guess_host_ip_for_containers(logger=None) -> str:
import logging
import re
import shutil
+ import socket
import subprocess
log = logger or logging.getLogger(__name__)
+ def can_bind_to_address(ip_addr: str) -> bool:
+ """Test if we can bind to the given IP address."""
+ try:
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+ sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+ sock.bind((ip_addr, 0)) # Port 0 = let OS choose a free port
+ return True
+ except (OSError, socket.error) as e:
+ log.debug("Cannot bind to %s: %s", ip_addr, e)
+ return False
+
def run(cmd, timeout=2, text=True):
return subprocess.run(cmd, capture_output=True, text=text, timeout=timeout)
@@ -261,10 +273,23 @@ def guess_host_ip_for_containers(logger=None) -> str:
"Container-reachable host IP candidates: %s",
", ".join(ordered_candidates),
)
- else:
- log.info("Container-reachable host IP: %s", ordered_candidates[0])
- return ordered_candidates[0]
+ # Try each candidate and return the first one we can bind to
+ for ip_addr in ordered_candidates:
+ if can_bind_to_address(ip_addr):
+ if len(ordered_candidates) > 1:
+ log.info("Selected bindable host IP: %s", ip_addr)
+ else:
+ log.info("Container-reachable host IP: %s", ip_addr)
+ return ip_addr
+ log.debug("Skipping %s (cannot bind)", ip_addr)
+
+ # None of the candidates were bindable, fall back to 127.0.0.1
+ log.warning(
+ "None of the discovered IPs (%s) can be bound; falling back to 127.0.0.1",
+ ", ".join(ordered_candidates),
+ )
+ return "127.0.0.1"
log.warning(
"No container bridge IP found. For rootless Podman (slirp4netns) there may be no host bridge; publish ports or use 10.0.2.2 from the container."
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..5f19b37d
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1 @@
+# Test package
\ No newline at end of file
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..7c2ffc1d
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,85 @@
+import asyncio
+import os
+import tempfile
+from pathlib import Path
+
+import pytest
+import pytest_asyncio
+from dotenv import load_dotenv
+
+# Load environment variables
+load_dotenv()
+
+# Force no-auth mode for testing by setting OAuth credentials to empty strings
+# This ensures anonymous JWT tokens are created automatically
+os.environ['GOOGLE_OAUTH_CLIENT_ID'] = ''
+os.environ['GOOGLE_OAUTH_CLIENT_SECRET'] = ''
+
+from src.config.settings import clients
+from src.session_manager import SessionManager
+from src.main import generate_jwt_keys
+
+
+@pytest.fixture(scope="session")
+def event_loop():
+ """Create an instance of the default event loop for the test session."""
+ loop = asyncio.get_event_loop_policy().new_event_loop()
+ yield loop
+ loop.close()
+
+
+@pytest_asyncio.fixture
+async def opensearch_client():
+ """OpenSearch client for testing - requires running OpenSearch."""
+ await clients.initialize()
+ yield clients.opensearch
+ # Cleanup test indices after tests
+ try:
+ await clients.opensearch.indices.delete(index="test_documents")
+ except Exception:
+ pass
+
+
+@pytest.fixture
+def session_manager():
+ """Session manager for testing."""
+ # Generate RSA keys before creating SessionManager
+ generate_jwt_keys()
+ sm = SessionManager("test-secret-key")
+ print(f"[DEBUG] SessionManager created with keys: private={sm.private_key_path}, public={sm.public_key_path}")
+ return sm
+
+
+@pytest.fixture
+def test_documents_dir():
+ """Create a temporary directory with test documents."""
+ with tempfile.TemporaryDirectory() as temp_dir:
+ test_dir = Path(temp_dir)
+
+ # Create some test files in supported formats
+ (test_dir / "test1.md").write_text("# Machine Learning Document\n\nThis is a test document about machine learning.")
+ (test_dir / "test2.md").write_text("# AI Document\n\nAnother document discussing artificial intelligence.")
+ (test_dir / "test3.md").write_text("# Data Science Document\n\nThis is a markdown file about data science.")
+
+ # Create subdirectory with files
+ sub_dir = test_dir / "subdir"
+ sub_dir.mkdir()
+ (sub_dir / "nested.md").write_text("# Neural Networks\n\nNested document about neural networks.")
+
+ yield test_dir
+
+
+@pytest.fixture
+def test_single_file():
+ """Create a single test file."""
+ with tempfile.NamedTemporaryFile(mode='w', suffix='_test_document.md', delete=False) as f:
+ f.write("# Single Test Document\n\nThis is a test document about OpenRAG testing framework. This document contains multiple sentences to ensure proper chunking. The content should be indexed and searchable in OpenSearch after processing.")
+ temp_path = f.name
+
+ yield temp_path
+
+ # Cleanup
+ try:
+ os.unlink(temp_path)
+ except FileNotFoundError:
+ pass
\ No newline at end of file
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
new file mode 100644
index 00000000..e27cd7ab
--- /dev/null
+++ b/tests/integration/__init__.py
@@ -0,0 +1 @@
+# Integration tests package
\ No newline at end of file
diff --git a/tests/integration/test_api_endpoints.py b/tests/integration/test_api_endpoints.py
new file mode 100644
index 00000000..869928fe
--- /dev/null
+++ b/tests/integration/test_api_endpoints.py
@@ -0,0 +1,296 @@
+import asyncio
+import os
+from pathlib import Path
+
+import httpx
+import pytest
+
+
+async def wait_for_service_ready(client: httpx.AsyncClient, timeout_s: float = 30.0):
+ """Poll existing endpoints until the app and OpenSearch are ready.
+
+ Strategy:
+ - GET /auth/me should return 200 immediately (confirms app is up).
+ - POST /search with query "*" avoids embeddings and checks OpenSearch/index readiness.
+ """
+ # First test OpenSearch JWT directly
+ from src.session_manager import SessionManager, AnonymousUser
+ import os
+ import hashlib
+ import jwt as jwt_lib
+ sm = SessionManager("test")
+ test_token = sm.create_jwt_token(AnonymousUser())
+ token_hash = hashlib.sha256(test_token.encode()).hexdigest()[:16]
+ print(f"[DEBUG] Generated test JWT token hash: {token_hash}")
+ print(f"[DEBUG] Using key paths: private={sm.private_key_path}, public={sm.public_key_path}")
+ with open(sm.public_key_path, 'rb') as f:
+ pub_key_hash = hashlib.sha256(f.read()).hexdigest()[:16]
+ print(f"[DEBUG] Public key hash: {pub_key_hash}")
+ # Decode token to see claims
+ decoded = jwt_lib.decode(test_token, options={"verify_signature": False})
+ print(f"[DEBUG] JWT claims: iss={decoded.get('iss')}, sub={decoded.get('sub')}, aud={decoded.get('aud')}, roles={decoded.get('roles')}")
+
+ # Test OpenSearch JWT auth directly
+ opensearch_url = f"https://{os.getenv('OPENSEARCH_HOST', 'localhost')}:{os.getenv('OPENSEARCH_PORT', '9200')}"
+ print(f"[DEBUG] Testing JWT auth directly against: {opensearch_url}/documents/_search")
+ async with httpx.AsyncClient(verify=False) as os_client:
+ r_os = await os_client.post(
+ f"{opensearch_url}/documents/_search",
+ headers={"Authorization": f"Bearer {test_token}"},
+ json={"query": {"match_all": {}}, "size": 0}
+ )
+ print(f"[DEBUG] Direct OpenSearch JWT test: status={r_os.status_code}, body={r_os.text[:500]}")
+ if r_os.status_code == 401:
+ print(f"[DEBUG] โ OpenSearch rejected JWT! OIDC config not working.")
+ else:
+ print(f"[DEBUG] โ OpenSearch accepted JWT!")
+
+ deadline = asyncio.get_event_loop().time() + timeout_s
+ last_err = None
+ while asyncio.get_event_loop().time() < deadline:
+ try:
+ r1 = await client.get("/auth/me")
+ print(f"[DEBUG] /auth/me status={r1.status_code}, body={r1.text[:200]}")
+ if r1.status_code in (401, 403):
+ raise AssertionError(f"/auth/me returned {r1.status_code}: {r1.text}")
+ if r1.status_code != 200:
+ await asyncio.sleep(0.5)
+ continue
+ # match_all readiness probe; no embeddings
+ r2 = await client.post("/search", json={"query": "*", "limit": 0})
+ print(f"[DEBUG] /search status={r2.status_code}, body={r2.text[:200]}")
+ if r2.status_code in (401, 403):
+ print(f"[DEBUG] Search failed with auth error. Response: {r2.text}")
+ raise AssertionError(f"/search returned {r2.status_code}: {r2.text}")
+ if r2.status_code == 200:
+ print("[DEBUG] Service ready!")
+ return
+ last_err = r2.text
+ except AssertionError:
+ raise
+ except Exception as e:
+ last_err = str(e)
+ print(f"[DEBUG] Exception during readiness check: {e}")
+ await asyncio.sleep(0.5)
+ raise AssertionError(f"Service not ready in time: {last_err}")
+
+
+@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
+@pytest.mark.asyncio
+async def test_upload_and_search_endpoint(tmp_path: Path, disable_langflow_ingest: bool):
+ """Boot the ASGI app and exercise /upload and /search endpoints."""
+ # Ensure we route uploads to traditional processor and disable startup ingest
+ os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = "true" if disable_langflow_ingest else "false"
+ os.environ["DISABLE_STARTUP_INGEST"] = "true"
+ # Force no-auth mode so endpoints bypass authentication
+ os.environ["GOOGLE_OAUTH_CLIENT_ID"] = ""
+ os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = ""
+
+ # Import after env vars to ensure settings pick them up. Clear cached modules
+ import sys
+ # Clear cached modules so settings pick up env and router sees new flag
+ for mod in [
+ "src.api.router",
+ "api.router", # Also clear the non-src path
+ "src.api.connector_router",
+ "api.connector_router",
+ "src.config.settings",
+ "config.settings",
+ "src.auth_middleware",
+ "auth_middleware",
+ "src.main",
+ "api", # Clear the api package itself
+ "src.api",
+ "services", # Clear services that import clients
+ "src.services",
+ "services.search_service",
+ "src.services.search_service",
+ ]:
+ sys.modules.pop(mod, None)
+ from src.main import create_app, startup_tasks
+ import src.api.router as upload_router
+ from src.config.settings import clients, INDEX_NAME, DISABLE_INGEST_WITH_LANGFLOW
+
+ # Ensure a clean index before startup
+ await clients.initialize()
+ try:
+ await clients.opensearch.indices.delete(index=INDEX_NAME)
+ # Wait for deletion to complete
+ await asyncio.sleep(1)
+ except Exception:
+ pass
+
+ app = await create_app()
+ # Manually run startup tasks since httpx ASGI transport here doesn't manage lifespan
+ await startup_tasks(app.state.services)
+
+ # Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True)
+ from src.main import _ensure_opensearch_index
+ await _ensure_opensearch_index()
+
+ # Verify index is truly empty after startup
+ try:
+ count_response = await clients.opensearch.count(index=INDEX_NAME)
+ doc_count = count_response.get('count', 0)
+ assert doc_count == 0, f"Index should be empty after startup but contains {doc_count} documents"
+ except Exception as e:
+ # If count fails, the index might not exist yet, which is fine
+ pass
+
+ transport = httpx.ASGITransport(app=app)
+ try:
+ async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
+ # Wait for app + OpenSearch readiness using existing endpoints
+ await wait_for_service_ready(client)
+
+ # Create a temporary markdown file to upload
+ file_path = tmp_path / "endpoint_test_doc.md"
+ file_text = (
+ "# Single Test Document\n\n"
+ "This is a test document about OpenRAG testing framework. "
+ "The content should be indexed and searchable in OpenSearch after processing."
+ )
+ file_path.write_text(file_text)
+
+ # POST via router (multipart)
+ files = {
+ "file": (
+ file_path.name,
+ file_path.read_bytes(),
+ "text/markdown",
+ )
+ }
+ upload_resp = await client.post("/upload", files=files)
+ body = upload_resp.json()
+ assert upload_resp.status_code == 201, upload_resp.text
+ assert body.get("status") in {"indexed", "unchanged"}
+ assert isinstance(body.get("id"), str)
+
+ # Poll search for the specific content until it's indexed
+ async def _wait_for_indexed(timeout_s: float = 30.0):
+ deadline = asyncio.get_event_loop().time() + timeout_s
+ while asyncio.get_event_loop().time() < deadline:
+ resp = await client.post(
+ "/search",
+ json={"query": "OpenRAG testing framework", "limit": 5},
+ )
+ if resp.status_code == 200 and resp.json().get("results"):
+ return resp
+ await asyncio.sleep(0.5)
+ return resp
+
+ search_resp = await _wait_for_indexed()
+
+ # POST /search
+ assert search_resp.status_code == 200, search_resp.text
+ search_body = search_resp.json()
+
+ # Basic shape and at least one hit
+ assert isinstance(search_body.get("results"), list)
+ assert len(search_body["results"]) >= 0
+ # When hits exist, confirm our phrase is present in top result content
+ if search_body["results"]:
+ top = search_body["results"][0]
+ assert "text" in top or "content" in top
+ text = top.get("text") or top.get("content")
+ assert isinstance(text, str)
+ assert "testing" in text.lower()
+ finally:
+ # Explicitly close global clients to avoid aiohttp warnings
+ from src.config.settings import clients
+ try:
+ await clients.close()
+ except Exception:
+ pass
+
+
+@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
+@pytest.mark.asyncio
+async def test_router_upload_ingest_traditional(tmp_path: Path, disable_langflow_ingest: bool):
+ """Exercise the router endpoint to ensure it routes to traditional upload when Langflow ingest is disabled."""
+ os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = "true" if disable_langflow_ingest else "false"
+ os.environ["DISABLE_STARTUP_INGEST"] = "true"
+ os.environ["GOOGLE_OAUTH_CLIENT_ID"] = ""
+ os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = ""
+
+ import sys
+ for mod in [
+ "src.api.router",
+ "api.router", # Also clear the non-src path
+ "src.api.connector_router",
+ "api.connector_router",
+ "src.config.settings",
+ "config.settings",
+ "src.auth_middleware",
+ "auth_middleware",
+ "src.main",
+ "api", # Clear the api package itself
+ "src.api",
+ "services", # Clear services that import clients
+ "src.services",
+ "services.search_service",
+ "src.services.search_service",
+ ]:
+ sys.modules.pop(mod, None)
+ from src.main import create_app, startup_tasks
+ import src.api.router as upload_router
+ from src.config.settings import clients, INDEX_NAME, DISABLE_INGEST_WITH_LANGFLOW
+
+ # Ensure a clean index before startup
+ await clients.initialize()
+ try:
+ await clients.opensearch.indices.delete(index=INDEX_NAME)
+ # Wait for deletion to complete
+ await asyncio.sleep(1)
+ except Exception:
+ pass
+
+ app = await create_app()
+ await startup_tasks(app.state.services)
+
+ # Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True)
+ from src.main import _ensure_opensearch_index
+ await _ensure_opensearch_index()
+
+ # Verify index is truly empty after startup
+ try:
+ count_response = await clients.opensearch.count(index=INDEX_NAME)
+ doc_count = count_response.get('count', 0)
+ assert doc_count == 0, f"Index should be empty after startup but contains {doc_count} documents"
+ except Exception as e:
+ # If count fails, the index might not exist yet, which is fine
+ pass
+ transport = httpx.ASGITransport(app=app)
+ try:
+ async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
+ await wait_for_service_ready(client)
+
+ file_path = tmp_path / "router_test_doc.md"
+ file_path.write_text("# Router Test\n\nThis file validates the upload router.")
+
+ files = {
+ "file": (
+ file_path.name,
+ file_path.read_bytes(),
+ "text/markdown",
+ )
+ }
+
+ resp = await client.post("/router/upload_ingest", files=files)
+ data = resp.json()
+
+ print(f"data: {data}")
+ if disable_langflow_ingest:
+ assert resp.status_code == 201 or resp.status_code == 202, resp.text
+ assert data.get("status") in {"indexed", "unchanged"}
+ assert isinstance(data.get("id"), str)
+ else:
+ assert resp.status_code == 201 or resp.status_code == 202, resp.text
+ assert isinstance(data.get("task_id"), str)
+ assert data.get("file_count") == 1
+ finally:
+ from src.config.settings import clients
+ try:
+ await clients.close()
+ except Exception:
+ pass
diff --git a/tests/integration/test_startup_ingest.py b/tests/integration/test_startup_ingest.py
new file mode 100644
index 00000000..b2243b33
--- /dev/null
+++ b/tests/integration/test_startup_ingest.py
@@ -0,0 +1,118 @@
+import asyncio
+import os
+from pathlib import Path
+
+import httpx
+import pytest
+
+
+async def wait_for_ready(client: httpx.AsyncClient, timeout_s: float = 30.0):
+ deadline = asyncio.get_event_loop().time() + timeout_s
+ last_err = None
+ while asyncio.get_event_loop().time() < deadline:
+ try:
+ r1 = await client.get("/auth/me")
+ if r1.status_code != 200:
+ await asyncio.sleep(0.5)
+ continue
+ r2 = await client.post("/search", json={"query": "*", "limit": 0})
+ if r2.status_code == 200:
+ return
+ last_err = r2.text
+ except Exception as e:
+ last_err = str(e)
+ await asyncio.sleep(0.5)
+ raise AssertionError(f"Service not ready in time: {last_err}")
+
+
+def count_files_in_documents() -> int:
+ base_dir = Path(os.getcwd()) / "documents"
+ if not base_dir.is_dir():
+ return 0
+ return sum(1 for _ in base_dir.rglob("*") if _.is_file())
+
+
+@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
+@pytest.mark.asyncio
+async def test_startup_ingest_creates_task(disable_langflow_ingest: bool):
+ # Ensure startup ingest runs and choose pipeline per param
+ os.environ["DISABLE_STARTUP_INGEST"] = "false"
+ os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = (
+ "true" if disable_langflow_ingest else "false"
+ )
+ # Force no-auth mode for simpler endpoint access
+ os.environ["GOOGLE_OAUTH_CLIENT_ID"] = ""
+ os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = ""
+
+ # Reload settings to pick up env for this test run
+ import sys
+
+ for mod in [
+ "src.api.router",
+ "src.api.connector_router",
+ "src.config.settings",
+ "src.auth_middleware",
+ "src.main",
+ ]:
+ sys.modules.pop(mod, None)
+
+ from src.main import create_app, startup_tasks
+ from src.config.settings import clients, INDEX_NAME
+
+ # Ensure a clean index before startup
+ await clients.initialize()
+ try:
+ await clients.opensearch.indices.delete(index=INDEX_NAME)
+ except Exception:
+ pass
+
+ app = await create_app()
+ # Trigger startup tasks explicitly
+ await startup_tasks(app.state.services)
+
+ # Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True)
+ from src.main import _ensure_opensearch_index
+ await _ensure_opensearch_index()
+
+ transport = httpx.ASGITransport(app=app)
+ try:
+ async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
+ await wait_for_ready(client)
+
+ expected_files = count_files_in_documents()
+
+ # Poll /tasks until we see at least one startup ingest task
+ async def _wait_for_task(timeout_s: float = 60.0):
+ deadline = asyncio.get_event_loop().time() + timeout_s
+ last = None
+ while asyncio.get_event_loop().time() < deadline:
+ resp = await client.get("/tasks")
+ if resp.status_code == 200:
+ data = resp.json()
+ last = data
+ tasks = data.get("tasks") if isinstance(data, dict) else None
+ if isinstance(tasks, list) and len(tasks) > 0:
+ return tasks
+ await asyncio.sleep(0.5)
+ return last.get("tasks") if isinstance(last, dict) else last
+
+ tasks = await _wait_for_task()
+ if expected_files == 0:
+ return # Nothing to do
+ if not (isinstance(tasks, list) and len(tasks) > 0):
+ # Fallback: verify that documents were indexed as a sign of startup ingest
+ sr = await client.post("/search", json={"query": "*", "limit": 1})
+ assert sr.status_code == 200, sr.text
+ total = sr.json().get("total")
+ assert isinstance(total, int) and total >= 0, "Startup ingest did not index documents"
+ return
+ newest = tasks[0]
+ assert "task_id" in newest
+ assert newest.get("total_files") == expected_files
+ finally:
+ # Explicitly close global clients to avoid aiohttp warnings
+ from src.config.settings import clients
+ try:
+ await clients.close()
+ except Exception:
+ pass
diff --git a/uv.lock b/uv.lock
index c9bc6714..14f76a60 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2,10 +2,10 @@ version = 1
revision = 2
requires-python = ">=3.13"
resolution-markers = [
- "sys_platform == 'darwin'",
- "platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
+ "platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
+ "sys_platform == 'darwin'",
]
[[package]]
@@ -291,8 +291,8 @@ name = "click"
version = "8.2.1"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
- "platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
+ "platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
dependencies = [
@@ -312,6 +312,67 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
]
+[[package]]
+name = "coverage"
+version = "7.10.7"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/51/26/d22c300112504f5f9a9fd2297ce33c35f3d353e4aeb987c8419453b2a7c2/coverage-7.10.7.tar.gz", hash = "sha256:f4ab143ab113be368a3e9b795f9cd7906c5ef407d6173fe9675a902e1fffc239", size = 827704, upload-time = "2025-09-21T20:03:56.815Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/9a/94/b765c1abcb613d103b64fcf10395f54d69b0ef8be6a0dd9c524384892cc7/coverage-7.10.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:981a651f543f2854abd3b5fcb3263aac581b18209be49863ba575de6edf4c14d", size = 218320, upload-time = "2025-09-21T20:01:56.629Z" },
+ { url = "https://files.pythonhosted.org/packages/72/4f/732fff31c119bb73b35236dd333030f32c4bfe909f445b423e6c7594f9a2/coverage-7.10.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:73ab1601f84dc804f7812dc297e93cd99381162da39c47040a827d4e8dafe63b", size = 218575, upload-time = "2025-09-21T20:01:58.203Z" },
+ { url = "https://files.pythonhosted.org/packages/87/02/ae7e0af4b674be47566707777db1aa375474f02a1d64b9323e5813a6cdd5/coverage-7.10.7-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a8b6f03672aa6734e700bbcd65ff050fd19cddfec4b031cc8cf1c6967de5a68e", size = 249568, upload-time = "2025-09-21T20:01:59.748Z" },
+ { url = "https://files.pythonhosted.org/packages/a2/77/8c6d22bf61921a59bce5471c2f1f7ac30cd4ac50aadde72b8c48d5727902/coverage-7.10.7-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10b6ba00ab1132a0ce4428ff68cf50a25efd6840a42cdf4239c9b99aad83be8b", size = 252174, upload-time = "2025-09-21T20:02:01.192Z" },
+ { url = "https://files.pythonhosted.org/packages/b1/20/b6ea4f69bbb52dac0aebd62157ba6a9dddbfe664f5af8122dac296c3ee15/coverage-7.10.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c79124f70465a150e89340de5963f936ee97097d2ef76c869708c4248c63ca49", size = 253447, upload-time = "2025-09-21T20:02:02.701Z" },
+ { url = "https://files.pythonhosted.org/packages/f9/28/4831523ba483a7f90f7b259d2018fef02cb4d5b90bc7c1505d6e5a84883c/coverage-7.10.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:69212fbccdbd5b0e39eac4067e20a4a5256609e209547d86f740d68ad4f04911", size = 249779, upload-time = "2025-09-21T20:02:04.185Z" },
+ { url = "https://files.pythonhosted.org/packages/a7/9f/4331142bc98c10ca6436d2d620c3e165f31e6c58d43479985afce6f3191c/coverage-7.10.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7ea7c6c9d0d286d04ed3541747e6597cbe4971f22648b68248f7ddcd329207f0", size = 251604, upload-time = "2025-09-21T20:02:06.034Z" },
+ { url = "https://files.pythonhosted.org/packages/ce/60/bda83b96602036b77ecf34e6393a3836365481b69f7ed7079ab85048202b/coverage-7.10.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b9be91986841a75042b3e3243d0b3cb0b2434252b977baaf0cd56e960fe1e46f", size = 249497, upload-time = "2025-09-21T20:02:07.619Z" },
+ { url = "https://files.pythonhosted.org/packages/5f/af/152633ff35b2af63977edd835d8e6430f0caef27d171edf2fc76c270ef31/coverage-7.10.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:b281d5eca50189325cfe1f365fafade89b14b4a78d9b40b05ddd1fc7d2a10a9c", size = 249350, upload-time = "2025-09-21T20:02:10.34Z" },
+ { url = "https://files.pythonhosted.org/packages/9d/71/d92105d122bd21cebba877228990e1646d862e34a98bb3374d3fece5a794/coverage-7.10.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:99e4aa63097ab1118e75a848a28e40d68b08a5e19ce587891ab7fd04475e780f", size = 251111, upload-time = "2025-09-21T20:02:12.122Z" },
+ { url = "https://files.pythonhosted.org/packages/a2/9e/9fdb08f4bf476c912f0c3ca292e019aab6712c93c9344a1653986c3fd305/coverage-7.10.7-cp313-cp313-win32.whl", hash = "sha256:dc7c389dce432500273eaf48f410b37886be9208b2dd5710aaf7c57fd442c698", size = 220746, upload-time = "2025-09-21T20:02:13.919Z" },
+ { url = "https://files.pythonhosted.org/packages/b1/b1/a75fd25df44eab52d1931e89980d1ada46824c7a3210be0d3c88a44aaa99/coverage-7.10.7-cp313-cp313-win_amd64.whl", hash = "sha256:cac0fdca17b036af3881a9d2729a850b76553f3f716ccb0360ad4dbc06b3b843", size = 221541, upload-time = "2025-09-21T20:02:15.57Z" },
+ { url = "https://files.pythonhosted.org/packages/14/3a/d720d7c989562a6e9a14b2c9f5f2876bdb38e9367126d118495b89c99c37/coverage-7.10.7-cp313-cp313-win_arm64.whl", hash = "sha256:4b6f236edf6e2f9ae8fcd1332da4e791c1b6ba0dc16a2dc94590ceccb482e546", size = 220170, upload-time = "2025-09-21T20:02:17.395Z" },
+ { url = "https://files.pythonhosted.org/packages/bb/22/e04514bf2a735d8b0add31d2b4ab636fc02370730787c576bb995390d2d5/coverage-7.10.7-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a0ec07fd264d0745ee396b666d47cef20875f4ff2375d7c4f58235886cc1ef0c", size = 219029, upload-time = "2025-09-21T20:02:18.936Z" },
+ { url = "https://files.pythonhosted.org/packages/11/0b/91128e099035ece15da3445d9015e4b4153a6059403452d324cbb0a575fa/coverage-7.10.7-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd5e856ebb7bfb7672b0086846db5afb4567a7b9714b8a0ebafd211ec7ce6a15", size = 219259, upload-time = "2025-09-21T20:02:20.44Z" },
+ { url = "https://files.pythonhosted.org/packages/8b/51/66420081e72801536a091a0c8f8c1f88a5c4bf7b9b1bdc6222c7afe6dc9b/coverage-7.10.7-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f57b2a3c8353d3e04acf75b3fed57ba41f5c0646bbf1d10c7c282291c97936b4", size = 260592, upload-time = "2025-09-21T20:02:22.313Z" },
+ { url = "https://files.pythonhosted.org/packages/5d/22/9b8d458c2881b22df3db5bb3e7369e63d527d986decb6c11a591ba2364f7/coverage-7.10.7-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1ef2319dd15a0b009667301a3f84452a4dc6fddfd06b0c5c53ea472d3989fbf0", size = 262768, upload-time = "2025-09-21T20:02:24.287Z" },
+ { url = "https://files.pythonhosted.org/packages/f7/08/16bee2c433e60913c610ea200b276e8eeef084b0d200bdcff69920bd5828/coverage-7.10.7-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83082a57783239717ceb0ad584de3c69cf581b2a95ed6bf81ea66034f00401c0", size = 264995, upload-time = "2025-09-21T20:02:26.133Z" },
+ { url = "https://files.pythonhosted.org/packages/20/9d/e53eb9771d154859b084b90201e5221bca7674ba449a17c101a5031d4054/coverage-7.10.7-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:50aa94fb1fb9a397eaa19c0d5ec15a5edd03a47bf1a3a6111a16b36e190cff65", size = 259546, upload-time = "2025-09-21T20:02:27.716Z" },
+ { url = "https://files.pythonhosted.org/packages/ad/b0/69bc7050f8d4e56a89fb550a1577d5d0d1db2278106f6f626464067b3817/coverage-7.10.7-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2120043f147bebb41c85b97ac45dd173595ff14f2a584f2963891cbcc3091541", size = 262544, upload-time = "2025-09-21T20:02:29.216Z" },
+ { url = "https://files.pythonhosted.org/packages/ef/4b/2514b060dbd1bc0aaf23b852c14bb5818f244c664cb16517feff6bb3a5ab/coverage-7.10.7-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2fafd773231dd0378fdba66d339f84904a8e57a262f583530f4f156ab83863e6", size = 260308, upload-time = "2025-09-21T20:02:31.226Z" },
+ { url = "https://files.pythonhosted.org/packages/54/78/7ba2175007c246d75e496f64c06e94122bdb914790a1285d627a918bd271/coverage-7.10.7-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:0b944ee8459f515f28b851728ad224fa2d068f1513ef6b7ff1efafeb2185f999", size = 258920, upload-time = "2025-09-21T20:02:32.823Z" },
+ { url = "https://files.pythonhosted.org/packages/c0/b3/fac9f7abbc841409b9a410309d73bfa6cfb2e51c3fada738cb607ce174f8/coverage-7.10.7-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4b583b97ab2e3efe1b3e75248a9b333bd3f8b0b1b8e5b45578e05e5850dfb2c2", size = 261434, upload-time = "2025-09-21T20:02:34.86Z" },
+ { url = "https://files.pythonhosted.org/packages/ee/51/a03bec00d37faaa891b3ff7387192cef20f01604e5283a5fabc95346befa/coverage-7.10.7-cp313-cp313t-win32.whl", hash = "sha256:2a78cd46550081a7909b3329e2266204d584866e8d97b898cd7fb5ac8d888b1a", size = 221403, upload-time = "2025-09-21T20:02:37.034Z" },
+ { url = "https://files.pythonhosted.org/packages/53/22/3cf25d614e64bf6d8e59c7c669b20d6d940bb337bdee5900b9ca41c820bb/coverage-7.10.7-cp313-cp313t-win_amd64.whl", hash = "sha256:33a5e6396ab684cb43dc7befa386258acb2d7fae7f67330ebb85ba4ea27938eb", size = 222469, upload-time = "2025-09-21T20:02:39.011Z" },
+ { url = "https://files.pythonhosted.org/packages/49/a1/00164f6d30d8a01c3c9c48418a7a5be394de5349b421b9ee019f380df2a0/coverage-7.10.7-cp313-cp313t-win_arm64.whl", hash = "sha256:86b0e7308289ddde73d863b7683f596d8d21c7d8664ce1dee061d0bcf3fbb4bb", size = 220731, upload-time = "2025-09-21T20:02:40.939Z" },
+ { url = "https://files.pythonhosted.org/packages/23/9c/5844ab4ca6a4dd97a1850e030a15ec7d292b5c5cb93082979225126e35dd/coverage-7.10.7-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b06f260b16ead11643a5a9f955bd4b5fd76c1a4c6796aeade8520095b75de520", size = 218302, upload-time = "2025-09-21T20:02:42.527Z" },
+ { url = "https://files.pythonhosted.org/packages/f0/89/673f6514b0961d1f0e20ddc242e9342f6da21eaba3489901b565c0689f34/coverage-7.10.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:212f8f2e0612778f09c55dd4872cb1f64a1f2b074393d139278ce902064d5b32", size = 218578, upload-time = "2025-09-21T20:02:44.468Z" },
+ { url = "https://files.pythonhosted.org/packages/05/e8/261cae479e85232828fb17ad536765c88dd818c8470aca690b0ac6feeaa3/coverage-7.10.7-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3445258bcded7d4aa630ab8296dea4d3f15a255588dd535f980c193ab6b95f3f", size = 249629, upload-time = "2025-09-21T20:02:46.503Z" },
+ { url = "https://files.pythonhosted.org/packages/82/62/14ed6546d0207e6eda876434e3e8475a3e9adbe32110ce896c9e0c06bb9a/coverage-7.10.7-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bb45474711ba385c46a0bfe696c695a929ae69ac636cda8f532be9e8c93d720a", size = 252162, upload-time = "2025-09-21T20:02:48.689Z" },
+ { url = "https://files.pythonhosted.org/packages/ff/49/07f00db9ac6478e4358165a08fb41b469a1b053212e8a00cb02f0d27a05f/coverage-7.10.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:813922f35bd800dca9994c5971883cbc0d291128a5de6b167c7aa697fcf59360", size = 253517, upload-time = "2025-09-21T20:02:50.31Z" },
+ { url = "https://files.pythonhosted.org/packages/a2/59/c5201c62dbf165dfbc91460f6dbbaa85a8b82cfa6131ac45d6c1bfb52deb/coverage-7.10.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:93c1b03552081b2a4423091d6fb3787265b8f86af404cff98d1b5342713bdd69", size = 249632, upload-time = "2025-09-21T20:02:51.971Z" },
+ { url = "https://files.pythonhosted.org/packages/07/ae/5920097195291a51fb00b3a70b9bbd2edbfe3c84876a1762bd1ef1565ebc/coverage-7.10.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cc87dd1b6eaf0b848eebb1c86469b9f72a1891cb42ac7adcfbce75eadb13dd14", size = 251520, upload-time = "2025-09-21T20:02:53.858Z" },
+ { url = "https://files.pythonhosted.org/packages/b9/3c/a815dde77a2981f5743a60b63df31cb322c944843e57dbd579326625a413/coverage-7.10.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:39508ffda4f343c35f3236fe8d1a6634a51f4581226a1262769d7f970e73bffe", size = 249455, upload-time = "2025-09-21T20:02:55.807Z" },
+ { url = "https://files.pythonhosted.org/packages/aa/99/f5cdd8421ea656abefb6c0ce92556709db2265c41e8f9fc6c8ae0f7824c9/coverage-7.10.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:925a1edf3d810537c5a3abe78ec5530160c5f9a26b1f4270b40e62cc79304a1e", size = 249287, upload-time = "2025-09-21T20:02:57.784Z" },
+ { url = "https://files.pythonhosted.org/packages/c3/7a/e9a2da6a1fc5d007dd51fca083a663ab930a8c4d149c087732a5dbaa0029/coverage-7.10.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2c8b9a0636f94c43cd3576811e05b89aa9bc2d0a85137affc544ae5cb0e4bfbd", size = 250946, upload-time = "2025-09-21T20:02:59.431Z" },
+ { url = "https://files.pythonhosted.org/packages/ef/5b/0b5799aa30380a949005a353715095d6d1da81927d6dbed5def2200a4e25/coverage-7.10.7-cp314-cp314-win32.whl", hash = "sha256:b7b8288eb7cdd268b0304632da8cb0bb93fadcfec2fe5712f7b9cc8f4d487be2", size = 221009, upload-time = "2025-09-21T20:03:01.324Z" },
+ { url = "https://files.pythonhosted.org/packages/da/b0/e802fbb6eb746de006490abc9bb554b708918b6774b722bb3a0e6aa1b7de/coverage-7.10.7-cp314-cp314-win_amd64.whl", hash = "sha256:1ca6db7c8807fb9e755d0379ccc39017ce0a84dcd26d14b5a03b78563776f681", size = 221804, upload-time = "2025-09-21T20:03:03.4Z" },
+ { url = "https://files.pythonhosted.org/packages/9e/e8/71d0c8e374e31f39e3389bb0bd19e527d46f00ea8571ec7ec8fd261d8b44/coverage-7.10.7-cp314-cp314-win_arm64.whl", hash = "sha256:097c1591f5af4496226d5783d036bf6fd6cd0cbc132e071b33861de756efb880", size = 220384, upload-time = "2025-09-21T20:03:05.111Z" },
+ { url = "https://files.pythonhosted.org/packages/62/09/9a5608d319fa3eba7a2019addeacb8c746fb50872b57a724c9f79f146969/coverage-7.10.7-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:a62c6ef0d50e6de320c270ff91d9dd0a05e7250cac2a800b7784bae474506e63", size = 219047, upload-time = "2025-09-21T20:03:06.795Z" },
+ { url = "https://files.pythonhosted.org/packages/f5/6f/f58d46f33db9f2e3647b2d0764704548c184e6f5e014bef528b7f979ef84/coverage-7.10.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9fa6e4dd51fe15d8738708a973470f67a855ca50002294852e9571cdbd9433f2", size = 219266, upload-time = "2025-09-21T20:03:08.495Z" },
+ { url = "https://files.pythonhosted.org/packages/74/5c/183ffc817ba68e0b443b8c934c8795553eb0c14573813415bd59941ee165/coverage-7.10.7-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:8fb190658865565c549b6b4706856d6a7b09302c797eb2cf8e7fe9dabb043f0d", size = 260767, upload-time = "2025-09-21T20:03:10.172Z" },
+ { url = "https://files.pythonhosted.org/packages/0f/48/71a8abe9c1ad7e97548835e3cc1adbf361e743e9d60310c5f75c9e7bf847/coverage-7.10.7-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:affef7c76a9ef259187ef31599a9260330e0335a3011732c4b9effa01e1cd6e0", size = 262931, upload-time = "2025-09-21T20:03:11.861Z" },
+ { url = "https://files.pythonhosted.org/packages/84/fd/193a8fb132acfc0a901f72020e54be5e48021e1575bb327d8ee1097a28fd/coverage-7.10.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e16e07d85ca0cf8bafe5f5d23a0b850064e8e945d5677492b06bbe6f09cc699", size = 265186, upload-time = "2025-09-21T20:03:13.539Z" },
+ { url = "https://files.pythonhosted.org/packages/b1/8f/74ecc30607dd95ad50e3034221113ccb1c6d4e8085cc761134782995daae/coverage-7.10.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:03ffc58aacdf65d2a82bbeb1ffe4d01ead4017a21bfd0454983b88ca73af94b9", size = 259470, upload-time = "2025-09-21T20:03:15.584Z" },
+ { url = "https://files.pythonhosted.org/packages/0f/55/79ff53a769f20d71b07023ea115c9167c0bb56f281320520cf64c5298a96/coverage-7.10.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1b4fd784344d4e52647fd7857b2af5b3fbe6c239b0b5fa63e94eb67320770e0f", size = 262626, upload-time = "2025-09-21T20:03:17.673Z" },
+ { url = "https://files.pythonhosted.org/packages/88/e2/dac66c140009b61ac3fc13af673a574b00c16efdf04f9b5c740703e953c0/coverage-7.10.7-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:0ebbaddb2c19b71912c6f2518e791aa8b9f054985a0769bdb3a53ebbc765c6a1", size = 260386, upload-time = "2025-09-21T20:03:19.36Z" },
+ { url = "https://files.pythonhosted.org/packages/a2/f1/f48f645e3f33bb9ca8a496bc4a9671b52f2f353146233ebd7c1df6160440/coverage-7.10.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:a2d9a3b260cc1d1dbdb1c582e63ddcf5363426a1a68faa0f5da28d8ee3c722a0", size = 258852, upload-time = "2025-09-21T20:03:21.007Z" },
+ { url = "https://files.pythonhosted.org/packages/bb/3b/8442618972c51a7affeead957995cfa8323c0c9bcf8fa5a027421f720ff4/coverage-7.10.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a3cc8638b2480865eaa3926d192e64ce6c51e3d29c849e09d5b4ad95efae5399", size = 261534, upload-time = "2025-09-21T20:03:23.12Z" },
+ { url = "https://files.pythonhosted.org/packages/b2/dc/101f3fa3a45146db0cb03f5b4376e24c0aac818309da23e2de0c75295a91/coverage-7.10.7-cp314-cp314t-win32.whl", hash = "sha256:67f8c5cbcd3deb7a60b3345dffc89a961a484ed0af1f6f73de91705cc6e31235", size = 221784, upload-time = "2025-09-21T20:03:24.769Z" },
+ { url = "https://files.pythonhosted.org/packages/4c/a1/74c51803fc70a8a40d7346660379e144be772bab4ac7bb6e6b905152345c/coverage-7.10.7-cp314-cp314t-win_amd64.whl", hash = "sha256:e1ed71194ef6dea7ed2d5cb5f7243d4bcd334bfb63e59878519be558078f848d", size = 222905, upload-time = "2025-09-21T20:03:26.93Z" },
+ { url = "https://files.pythonhosted.org/packages/12/65/f116a6d2127df30bcafbceef0302d8a64ba87488bf6f73a6d8eebf060873/coverage-7.10.7-cp314-cp314t-win_arm64.whl", hash = "sha256:7fe650342addd8524ca63d77b2362b02345e5f1a093266787d210c70a50b471a", size = 220922, upload-time = "2025-09-21T20:03:28.672Z" },
+ { url = "https://files.pythonhosted.org/packages/ec/16/114df1c291c22cac3b0c127a73e0af5c12ed7bbb6558d310429a0ae24023/coverage-7.10.7-py3-none-any.whl", hash = "sha256:f7941f6f2fe6dd6807a1208737b8a0cbcf1cc6d7b07d24998ad2d63590868260", size = 209952, upload-time = "2025-09-21T20:03:53.918Z" },
+]
+
[[package]]
name = "cramjam"
version = "2.11.0"
@@ -454,8 +515,8 @@ name = "dill"
version = "0.4.0"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
- "platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
+ "platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" }
@@ -619,8 +680,8 @@ name = "docling-mcp"
version = "1.1.0"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
- "platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
+ "platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
dependencies = [
@@ -943,8 +1004,8 @@ name = "fsspec"
version = "2025.5.1"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
- "platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
+ "platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
sdist = { url = "https://files.pythonhosted.org/packages/00/f7/27f15d41f0ed38e8fcc488584b57e902b331da7f7c6dcda53721b15838fc/fsspec-2025.5.1.tar.gz", hash = "sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475", size = 303033, upload-time = "2025-05-24T12:03:23.792Z" }
@@ -1264,8 +1325,8 @@ name = "huggingface-hub"
version = "0.33.2"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
- "platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
+ "platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
dependencies = [
@@ -1339,6 +1400,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" },
]
+[[package]]
+name = "iniconfig"
+version = "2.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" },
+]
+
[[package]]
name = "jinja2"
version = "3.1.6"
@@ -1960,8 +2030,8 @@ name = "multiprocess"
version = "0.70.18"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
- "platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
+ "platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
dependencies = [
@@ -2282,7 +2352,7 @@ wheels = [
[[package]]
name = "openrag"
-version = "0.1.14.dev3"
+version = "0.1.19"
source = { editable = "." }
dependencies = [
{ name = "agentd" },
@@ -2312,6 +2382,14 @@ dependencies = [
{ name = "uvicorn" },
]
+[package.dev-dependencies]
+dev = [
+ { name = "pytest" },
+ { name = "pytest-asyncio" },
+ { name = "pytest-cov" },
+ { name = "pytest-mock" },
+]
+
[package.metadata]
requires-dist = [
{ name = "agentd", specifier = ">=0.2.2" },
@@ -2341,6 +2419,14 @@ requires-dist = [
{ name = "uvicorn", specifier = ">=0.35.0" },
]
+[package.metadata.requires-dev]
+dev = [
+ { name = "pytest", specifier = ">=8" },
+ { name = "pytest-asyncio", specifier = ">=0.21.0" },
+ { name = "pytest-cov", specifier = ">=4.0.0" },
+ { name = "pytest-mock", specifier = ">=3.12.0" },
+]
+
[[package]]
name = "opensearch-py"
version = "3.0.0"
@@ -2836,6 +2922,60 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/e1/6b/2706497c86e8d69fb76afe5ea857fe1794621aa0f3b1d863feb953fe0f22/pypdfium2-4.30.1-py3-none-win_arm64.whl", hash = "sha256:c2b6d63f6d425d9416c08d2511822b54b8e3ac38e639fc41164b1d75584b3a8c", size = 2814810, upload-time = "2024-12-19T19:28:09.857Z" },
]
+[[package]]
+name = "pytest"
+version = "8.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "colorama", marker = "sys_platform == 'win32'" },
+ { name = "iniconfig" },
+ { name = "packaging" },
+ { name = "pluggy" },
+ { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" },
+]
+
+[[package]]
+name = "pytest-asyncio"
+version = "1.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/42/86/9e3c5f48f7b7b638b216e4b9e645f54d199d7abbbab7a64a13b4e12ba10f/pytest_asyncio-1.2.0.tar.gz", hash = "sha256:c609a64a2a8768462d0c99811ddb8bd2583c33fd33cf7f21af1c142e824ffb57", size = 50119, upload-time = "2025-09-12T07:33:53.816Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/04/93/2fa34714b7a4ae72f2f8dad66ba17dd9a2c793220719e736dda28b7aec27/pytest_asyncio-1.2.0-py3-none-any.whl", hash = "sha256:8e17ae5e46d8e7efe51ab6494dd2010f4ca8dae51652aa3c8d55acf50bfb2e99", size = 15095, upload-time = "2025-09-12T07:33:52.639Z" },
+]
+
+[[package]]
+name = "pytest-cov"
+version = "7.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "coverage" },
+ { name = "pluggy" },
+ { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" },
+]
+
+[[package]]
+name = "pytest-mock"
+version = "3.15.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/68/14/eb014d26be205d38ad5ad20d9a80f7d201472e08167f0bb4361e251084a9/pytest_mock-3.15.1.tar.gz", hash = "sha256:1849a238f6f396da19762269de72cb1814ab44416fa73a8686deac10b0d87a0f", size = 34036, upload-time = "2025-09-16T16:37:27.081Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/5a/cc/06253936f4a7fa2e0f48dfe6d851d9c56df896a9ab09ac019d70b760619c/pytest_mock-3.15.1-py3-none-any.whl", hash = "sha256:0a25e2eb88fe5168d535041d09a4529a188176ae608a6d249ee65abc0949630d", size = 10095, upload-time = "2025-09-16T16:37:25.734Z" },
+]
+
[[package]]
name = "python-bidi"
version = "0.6.6"
@@ -3622,9 +3762,9 @@ name = "torch"
version = "2.8.0"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
- "sys_platform == 'darwin'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
+ "sys_platform == 'darwin'",
]
dependencies = [
{ name = "filelock", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" },
@@ -3669,9 +3809,9 @@ name = "torchvision"
version = "0.23.0"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
- "sys_platform == 'darwin'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
+ "sys_platform == 'darwin'",
]
dependencies = [
{ name = "numpy", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" },