From 38e3007964d2f8a026ea0ffe5fb4b06d64734cc9 Mon Sep 17 00:00:00 2001 From: phact Date: Thu, 11 Sep 2025 11:48:43 -0400 Subject: [PATCH 01/58] dead method --- src/services/task_service.py | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/src/services/task_service.py b/src/services/task_service.py index 8e69d4ae..c0d7ffad 100644 --- a/src/services/task_service.py +++ b/src/services/task_service.py @@ -109,33 +109,6 @@ class TaskService: return task_id - async def background_upload_processor(self, user_id: str, task_id: str) -> None: - """Background task to process all files in an upload job with concurrency control""" - try: - upload_task = self.task_store[user_id][task_id] - upload_task.status = TaskStatus.RUNNING - upload_task.updated_at = time.time() - - # Process files with limited concurrency to avoid overwhelming the system - max_workers = get_worker_count() - semaphore = asyncio.Semaphore(max_workers * 2) # Allow 2x process pool size for async I/O - - async def process_with_semaphore(file_path: str): - async with semaphore: - await self.document_service.process_single_file_task(upload_task, file_path) - - tasks = [process_with_semaphore(file_path) for file_path in upload_task.file_tasks.keys()] - - await asyncio.gather(*tasks, return_exceptions=True) - - except Exception as e: - logger.error("Background upload processor failed", task_id=task_id, error=str(e)) - import traceback - - traceback.print_exc() - if user_id in self.task_store and task_id in self.task_store[user_id]: - self.task_store[user_id][task_id].status = TaskStatus.FAILED - self.task_store[user_id][task_id].updated_at = time.time() async def background_custom_processor(self, user_id: str, task_id: str, items: list) -> None: """Background task to process items using custom processor""" From f761eab1b481df73b9c8bcb53f1360df2e1f43f0 Mon Sep 17 00:00:00 2001 From: phact Date: Thu, 11 Sep 2025 11:58:35 -0400 Subject: [PATCH 02/58] upload dir should respect langflow flag --- src/api/upload.py | 32 ++++++++++++++++++++++++-------- src/main.py | 1 + 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/api/upload.py b/src/api/upload.py index 373b2948..d845e978 100644 --- a/src/api/upload.py +++ b/src/api/upload.py @@ -45,7 +45,7 @@ async def upload(request: Request, document_service, session_manager): return JSONResponse({"error": error_msg}, status_code=500) -async def upload_path(request: Request, task_service, session_manager): +async def upload_path(request: Request, task_service, session_manager, langflow_file_service): """Upload all files from a directory path""" payload = await request.json() base_dir = payload.get("path") @@ -74,13 +74,29 @@ async def upload_path(request: Request, task_service, session_manager): owner_name = user.name owner_email = user.email - task_id = await task_service.create_upload_task( - owner_user_id, - file_paths, - jwt_token=jwt_token, - owner_name=owner_name, - owner_email=owner_email, - ) + from config.settings import DISABLE_INGEST_WITH_LANGFLOW + + # Use same logic as single file uploads - respect the Langflow setting + if DISABLE_INGEST_WITH_LANGFLOW: + # Use direct DocumentFileProcessor (no Langflow) + task_id = await task_service.create_upload_task( + owner_user_id, + file_paths, + jwt_token=jwt_token, + owner_name=owner_name, + owner_email=owner_email, + ) + else: + # Use Langflow pipeline for processing + task_id = await task_service.create_langflow_upload_task( + user_id=owner_user_id, + file_paths=file_paths, + langflow_file_service=langflow_file_service, + session_manager=session_manager, + jwt_token=jwt_token, + owner_name=owner_name, + owner_email=owner_email, + ) return JSONResponse( {"task_id": task_id, "total_files": len(file_paths), "status": "accepted"}, diff --git a/src/main.py b/src/main.py index 1c0dc09f..a0f00268 100644 --- a/src/main.py +++ b/src/main.py @@ -558,6 +558,7 @@ async def create_app(): upload.upload_path, task_service=services["task_service"], session_manager=services["session_manager"], + langflow_file_service=services["langflow_file_service"], ) ), methods=["POST"], From 6533367fa0fb561abc926436b0aa5b38b8040022 Mon Sep 17 00:00:00 2001 From: phact Date: Thu, 11 Sep 2025 12:16:36 -0400 Subject: [PATCH 03/58] fix process count bug --- src/services/document_service.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/services/document_service.py b/src/services/document_service.py index 70a70942..22f61411 100644 --- a/src/services/document_service.py +++ b/src/services/document_service.py @@ -430,8 +430,4 @@ class DocumentService: upload_task.failed_files += 1 finally: file_task.updated_at = time.time() - upload_task.processed_files += 1 upload_task.updated_at = time.time() - - if upload_task.processed_files >= upload_task.total_files: - upload_task.status = TaskStatus.COMPLETED From 219f9da4e09acc80c547aa1c6a918e6b20459c38 Mon Sep 17 00:00:00 2001 From: phact Date: Thu, 11 Sep 2025 13:25:26 -0400 Subject: [PATCH 04/58] disable startup ingest flag --- src/config/settings.py | 1 + src/main.py | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/config/settings.py b/src/config/settings.py index 715146fb..ace9d5cb 100644 --- a/src/config/settings.py +++ b/src/config/settings.py @@ -48,6 +48,7 @@ GOOGLE_OAUTH_CLIENT_SECRET = os.getenv("GOOGLE_OAUTH_CLIENT_SECRET") # Ingestion configuration DISABLE_INGEST_WITH_LANGFLOW = os.getenv("DISABLE_INGEST_WITH_LANGFLOW", "false").lower() in ("true", "1", "yes") +DISABLE_STARTUP_INGEST = os.getenv("DISABLE_STARTUP_INGEST", "false").lower() in ("true", "1", "yes") def is_no_auth_mode(): diff --git a/src/main.py b/src/main.py index a0f00268..1912f7df 100644 --- a/src/main.py +++ b/src/main.py @@ -386,9 +386,14 @@ async def _ingest_default_documents_openrag(services, file_paths): async def startup_tasks(services): """Startup tasks""" + from config.settings import DISABLE_STARTUP_INGEST + logger.info("Starting startup tasks") await init_index() - await ingest_default_documents_when_ready(services) + if DISABLE_STARTUP_INGEST: + logger.info("Startup ingest disabled via DISABLE_STARTUP_INGEST; skipping default documents ingestion") + else: + await ingest_default_documents_when_ready(services) async def initialize_services(): From 0866b5218e49b37cb40a1596311eb651b48745fe Mon Sep 17 00:00:00 2001 From: phact Date: Thu, 11 Sep 2025 16:02:56 -0400 Subject: [PATCH 05/58] docker compose not docker-compose --- Makefile | 72 ++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 23 deletions(-) diff --git a/Makefile b/Makefile index fe76467a..6ac03b93 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # OpenRAG Development Makefile # Provides easy commands for development workflow -.PHONY: help dev dev-cpu dev-local infra stop clean build logs shell-backend shell-frontend install test backend frontend install-be install-fe build-be build-fe logs-be logs-fe logs-lf logs-os shell-be shell-lf shell-os restart status health db-reset flow-upload quick setup +.PHONY: help dev dev-cpu dev-local infra stop clean build logs shell-backend shell-frontend install test test-integration test-unit test-ingest test-search test-coverage backend frontend install-be install-fe build-be build-fe logs-be logs-fe logs-lf logs-os shell-be shell-lf shell-os restart status health db-reset flow-upload quick setup # Default target help: @@ -32,14 +32,19 @@ help: @echo " shell-lf - Shell into langflow container" @echo "" @echo "Testing:" - @echo " test - Run backend tests" + @echo " test - Run all backend tests" + @echo " test-integration - Run integration tests (requires infra)" + @echo " test-unit - Run unit tests only" + @echo " test-ingest - Test file ingestion flows" + @echo " test-search - Test search functionality" + @echo " test-coverage - Run tests with coverage report" @echo " lint - Run linting checks" @echo "" # Development environments dev: @echo "๐Ÿš€ Starting OpenRAG with GPU support..." - docker-compose up -d + docker compose up -d @echo "โœ… Services started!" @echo " Backend: http://localhost:8000" @echo " Frontend: http://localhost:3000" @@ -49,7 +54,7 @@ dev: dev-cpu: @echo "๐Ÿš€ Starting OpenRAG with CPU only..." - docker-compose -f docker-compose-cpu.yml up -d + docker compose -f docker-compose-cpu.yml up -d @echo "โœ… Services started!" @echo " Backend: http://localhost:8000" @echo " Frontend: http://localhost:3000" @@ -59,7 +64,7 @@ dev-cpu: dev-local: @echo "๐Ÿ”ง Starting infrastructure only (for local development)..." - docker-compose up -d opensearch dashboards langflow + docker compose up -d opensearch dashboards langflow @echo "โœ… Infrastructure started!" @echo " Langflow: http://localhost:7860" @echo " OpenSearch: http://localhost:9200" @@ -69,7 +74,7 @@ dev-local: infra: @echo "๐Ÿ”ง Starting infrastructure services only..." - docker-compose up -d opensearch dashboards langflow + docker compose up -d opensearch dashboards langflow @echo "โœ… Infrastructure services started!" @echo " Langflow: http://localhost:7860" @echo " OpenSearch: http://localhost:9200" @@ -78,15 +83,15 @@ infra: # Container management stop: @echo "๐Ÿ›‘ Stopping all containers..." - docker-compose down - docker-compose -f docker-compose-cpu.yml down 2>/dev/null || true + docker compose down + docker compose -f docker-compose-cpu.yml down 2>/dev/null || true restart: stop dev clean: stop @echo "๐Ÿงน Cleaning up containers and volumes..." - docker-compose down -v --remove-orphans - docker-compose -f docker-compose-cpu.yml down -v --remove-orphans 2>/dev/null || true + docker compose down -v --remove-orphans + docker compose -f docker-compose-cpu.yml down -v --remove-orphans 2>/dev/null || true docker system prune -f # Local development @@ -115,7 +120,7 @@ install-fe: # Building build: @echo "๐Ÿ”จ Building Docker images..." - docker-compose build + docker compose build build-be: @echo "๐Ÿ”จ Building backend image..." @@ -128,41 +133,62 @@ build-fe: # Logging and debugging logs: @echo "๐Ÿ“‹ Showing all container logs..." - docker-compose logs -f + docker compose logs -f logs-be: @echo "๐Ÿ“‹ Showing backend logs..." - docker-compose logs -f openrag-backend + docker compose logs -f openrag-backend logs-fe: @echo "๐Ÿ“‹ Showing frontend logs..." - docker-compose logs -f openrag-frontend + docker compose logs -f openrag-frontend logs-lf: @echo "๐Ÿ“‹ Showing langflow logs..." - docker-compose logs -f langflow + docker compose logs -f langflow logs-os: @echo "๐Ÿ“‹ Showing opensearch logs..." - docker-compose logs -f opensearch + docker compose logs -f opensearch # Shell access shell-be: @echo "๐Ÿš Opening shell in backend container..." - docker-compose exec openrag-backend /bin/bash + docker compose exec openrag-backend /bin/bash shell-lf: @echo "๐Ÿš Opening shell in langflow container..." - docker-compose exec langflow /bin/bash + docker compose exec langflow /bin/bash shell-os: @echo "๐Ÿš Opening shell in opensearch container..." - docker-compose exec opensearch /bin/bash + docker compose exec opensearch /bin/bash # Testing and quality test: - @echo "๐Ÿงช Running backend tests..." - uv run pytest + @echo "๐Ÿงช Running all backend tests..." + uv run pytest tests/ -v + +test-integration: + @echo "๐Ÿงช Running integration tests (requires infrastructure)..." + @echo "๐Ÿ’ก Make sure to run 'make infra' first!" + uv run pytest tests/integration/ -v + +test-unit: + @echo "๐Ÿงช Running unit tests..." + uv run pytest tests/unit/ -v + +test-ingest: + @echo "๐Ÿงช Testing file ingestion flows..." + uv run pytest tests/integration/test_file_ingest.py -v + +test-search: + @echo "๐Ÿงช Testing search functionality..." + uv run pytest tests/integration/test_search_flow.py -v + +test-coverage: + @echo "๐Ÿงช Running tests with coverage report..." + uv run pytest tests/ --cov=src --cov-report=term-missing --cov-report=html:htmlcov lint: @echo "๐Ÿ” Running linting checks..." @@ -172,7 +198,7 @@ lint: # Service status status: @echo "๐Ÿ“Š Container status:" - @docker-compose ps 2>/dev/null || echo "No containers running" + @docker compose ps 2>/dev/null || echo "No containers running" health: @echo "๐Ÿฅ Health check:" @@ -207,4 +233,4 @@ setup: @echo "โš™๏ธ Setting up development environment..." @if [ ! -f .env ]; then cp .env.example .env && echo "๐Ÿ“ Created .env from template"; fi @$(MAKE) install - @echo "โœ… Setup complete! Run 'make dev' to start." \ No newline at end of file + @echo "โœ… Setup complete! Run 'make dev' to start." From c6ba47d11887fdccef0b2a6c27b9123b024aacb9 Mon Sep 17 00:00:00 2001 From: phact Date: Thu, 11 Sep 2025 16:05:46 -0400 Subject: [PATCH 06/58] ingsest refactor --- frontend/components/knowledge-dropdown.tsx | 4 +- frontend/src/app/admin/page.tsx | 4 +- src/api/langflow_files.py | 243 --------------------- src/api/router.py | 98 +++++---- src/api/upload.py | 82 ++----- src/api/upload_utils.py | 47 ++++ src/main.py | 195 ++++++----------- src/services/document_service.py | 7 +- 8 files changed, 197 insertions(+), 483 deletions(-) create mode 100644 src/api/upload_utils.py diff --git a/frontend/components/knowledge-dropdown.tsx b/frontend/components/knowledge-dropdown.tsx index 481a45b1..31cdea31 100644 --- a/frontend/components/knowledge-dropdown.tsx +++ b/frontend/components/knowledge-dropdown.tsx @@ -134,7 +134,7 @@ export function KnowledgeDropdown({ active, variant = 'navigation' }: KnowledgeD formData.append('file', files[0]) // Use router upload and ingest endpoint (automatically routes based on configuration) - const uploadIngestRes = await fetch('/api/router/upload_ingest', { + const uploadIngestRes = await fetch('/api/upload', { method: 'POST', body: formData, }) @@ -463,4 +463,4 @@ export function KnowledgeDropdown({ active, variant = 'navigation' }: KnowledgeD ) -} \ No newline at end of file +} diff --git a/frontend/src/app/admin/page.tsx b/frontend/src/app/admin/page.tsx index 6cb8aa96..c8c9ecf8 100644 --- a/frontend/src/app/admin/page.tsx +++ b/frontend/src/app/admin/page.tsx @@ -51,7 +51,7 @@ function AdminPage() { const formData = new FormData() formData.append("file", selectedFile) - const response = await fetch("/api/router/upload_ingest", { + const response = await fetch("/api/upload", { method: "POST", body: formData, }) @@ -326,4 +326,4 @@ export default function ProtectedAdminPage() { ) -} \ No newline at end of file +} diff --git a/src/api/langflow_files.py b/src/api/langflow_files.py index a5595813..41d3ac08 100644 --- a/src/api/langflow_files.py +++ b/src/api/langflow_files.py @@ -6,249 +6,6 @@ from utils.logging_config import get_logger logger = get_logger(__name__) - -async def upload_user_file( - request: Request, langflow_file_service: LangflowFileService, session_manager -): - try: - logger.debug("upload_user_file endpoint called") - form = await request.form() - upload_file = form.get("file") - if upload_file is None: - logger.error("No file provided in upload request") - return JSONResponse({"error": "Missing file"}, status_code=400) - - logger.debug( - "Processing file", filename=upload_file.filename, size=upload_file.size - ) - - # starlette UploadFile provides file-like; httpx needs (filename, file, content_type) - content = await upload_file.read() - file_tuple = ( - upload_file.filename, - content, - upload_file.content_type or "application/octet-stream", - ) - - jwt_token = getattr(request.state, "jwt_token", None) - logger.debug("JWT token status", jwt_present=jwt_token is not None) - - logger.debug("Calling langflow_file_service.upload_user_file") - result = await langflow_file_service.upload_user_file(file_tuple, jwt_token) - logger.debug("Upload successful", result=result) - return JSONResponse(result, status_code=201) - except Exception as e: - logger.error( - "upload_user_file endpoint failed", - error_type=type(e).__name__, - error=str(e), - ) - import traceback - - logger.error("Full traceback", traceback=traceback.format_exc()) - return JSONResponse({"error": str(e)}, status_code=500) - - -async def run_ingestion( - request: Request, langflow_file_service: LangflowFileService, session_manager -): - try: - payload = await request.json() - file_ids = payload.get("file_ids") - file_paths = payload.get("file_paths") or [] - session_id = payload.get("session_id") - tweaks = payload.get("tweaks") or {} - settings = payload.get("settings", {}) - - # We assume file_paths is provided. If only file_ids are provided, client would need to resolve to paths via Files API (not implemented here). - if not file_paths and not file_ids: - return JSONResponse( - {"error": "Provide file_paths or file_ids"}, status_code=400 - ) - - # Convert UI settings to component tweaks using exact component IDs - if settings: - logger.debug("Applying ingestion settings", settings=settings) - - # Split Text component tweaks (SplitText-QIKhg) - if ( - settings.get("chunkSize") - or settings.get("chunkOverlap") - or settings.get("separator") - ): - if "SplitText-QIKhg" not in tweaks: - tweaks["SplitText-QIKhg"] = {} - if settings.get("chunkSize"): - tweaks["SplitText-QIKhg"]["chunk_size"] = settings["chunkSize"] - if settings.get("chunkOverlap"): - tweaks["SplitText-QIKhg"]["chunk_overlap"] = settings[ - "chunkOverlap" - ] - if settings.get("separator"): - tweaks["SplitText-QIKhg"]["separator"] = settings["separator"] - - # OpenAI Embeddings component tweaks (OpenAIEmbeddings-joRJ6) - if settings.get("embeddingModel"): - if "OpenAIEmbeddings-joRJ6" not in tweaks: - tweaks["OpenAIEmbeddings-joRJ6"] = {} - tweaks["OpenAIEmbeddings-joRJ6"]["model"] = settings["embeddingModel"] - - # Note: OpenSearch component tweaks not needed for ingestion - # (search parameters are for retrieval, not document processing) - - logger.debug("Final tweaks with settings applied", tweaks=tweaks) - # Include user JWT if available - jwt_token = getattr(request.state, "jwt_token", None) - - # Extract user info from User object - user = getattr(request.state, "user", None) - user_id = user.user_id if user else None - user_name = user.name if user else None - user_email = user.email if user else None - - if jwt_token: - # Set auth context for downstream services - from auth_context import set_auth_context - - set_auth_context(user_id, jwt_token) - - result = await langflow_file_service.run_ingestion_flow( - file_paths=file_paths or [], - jwt_token=jwt_token, - session_id=session_id, - tweaks=tweaks, - owner=user_id, - owner_name=user_name, - owner_email=user_email, - connector_type="local", - ) - return JSONResponse(result) - except Exception as e: - return JSONResponse({"error": str(e)}, status_code=500) - - -async def upload_and_ingest_user_file( - request: Request, langflow_file_service: LangflowFileService, session_manager, task_service -): - """Combined upload and ingest endpoint - uses task service for tracking and cancellation""" - try: - logger.debug("upload_and_ingest_user_file endpoint called - using task service") - form = await request.form() - upload_file = form.get("file") - if upload_file is None: - logger.error("No file provided in upload_and_ingest request") - return JSONResponse({"error": "Missing file"}, status_code=400) - - # Extract optional parameters - session_id = form.get("session_id") - settings_json = form.get("settings") - tweaks_json = form.get("tweaks") - delete_after_ingest = form.get("delete_after_ingest", "true").lower() == "true" - - # Parse JSON fields if provided - settings = None - tweaks = None - - if settings_json: - try: - import json - settings = json.loads(settings_json) - except json.JSONDecodeError as e: - logger.error("Invalid settings JSON", error=str(e)) - return JSONResponse({"error": "Invalid settings JSON"}, status_code=400) - - if tweaks_json: - try: - import json - tweaks = json.loads(tweaks_json) - except json.JSONDecodeError as e: - logger.error("Invalid tweaks JSON", error=str(e)) - return JSONResponse({"error": "Invalid tweaks JSON"}, status_code=400) - - # Get user info from request state - user = getattr(request.state, "user", None) - user_id = user.user_id if user else None - user_name = user.name if user else None - user_email = user.email if user else None - jwt_token = getattr(request.state, "jwt_token", None) - - if not user_id: - return JSONResponse({"error": "User authentication required"}, status_code=401) - - logger.debug( - "Processing file for task-based upload and ingest", - filename=upload_file.filename, - size=upload_file.size, - session_id=session_id, - has_settings=bool(settings), - has_tweaks=bool(tweaks), - delete_after_ingest=delete_after_ingest, - user_id=user_id - ) - - # Create temporary file for task processing - import tempfile - import os - - # Read file content - content = await upload_file.read() - - # Create temporary file - safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_") - temp_fd, temp_path = tempfile.mkstemp( - suffix=f"_{safe_filename}" - ) - - try: - # Write content to temp file - with os.fdopen(temp_fd, 'wb') as temp_file: - temp_file.write(content) - - logger.debug("Created temporary file for task processing", temp_path=temp_path) - - # Create langflow upload task for single file - task_id = await task_service.create_langflow_upload_task( - user_id=user_id, - file_paths=[temp_path], - langflow_file_service=langflow_file_service, - session_manager=session_manager, - jwt_token=jwt_token, - owner_name=user_name, - owner_email=user_email, - session_id=session_id, - tweaks=tweaks, - settings=settings, - delete_after_ingest=delete_after_ingest, - ) - - logger.debug("Langflow upload task created successfully", task_id=task_id) - - return JSONResponse({ - "task_id": task_id, - "message": f"Langflow upload task created for file '{upload_file.filename}'", - "filename": upload_file.filename - }, status_code=202) # 202 Accepted for async processing - - except Exception: - # Clean up temp file on error - try: - if os.path.exists(temp_path): - os.unlink(temp_path) - except Exception: - pass # Ignore cleanup errors - raise - - except Exception as e: - logger.error( - "upload_and_ingest_user_file endpoint failed", - error_type=type(e).__name__, - error=str(e), - ) - import traceback - logger.error("Full traceback", traceback=traceback.format_exc()) - return JSONResponse({"error": str(e)}, status_code=500) - - async def delete_user_files( request: Request, langflow_file_service: LangflowFileService, session_manager ): diff --git a/src/api/router.py b/src/api/router.py index 154757a5..620b0d55 100644 --- a/src/api/router.py +++ b/src/api/router.py @@ -3,11 +3,8 @@ from starlette.requests import Request from starlette.responses import JSONResponse -from config.settings import DISABLE_INGEST_WITH_LANGFLOW from utils.logging_config import get_logger - -# Import the actual endpoint implementations -from .upload import upload as traditional_upload +from .upload_utils import extract_user_context, create_temp_files_from_form_files logger = get_logger(__name__) @@ -29,20 +26,57 @@ async def upload_ingest_router( All langflow uploads are processed as background tasks for better scalability. """ try: - logger.debug( - "Router upload_ingest endpoint called", - disable_langflow_ingest=DISABLE_INGEST_WITH_LANGFLOW - ) + # Read setting at request time to avoid stale module-level values + from config import settings as cfg + disable_langflow_ingest = cfg.DISABLE_INGEST_WITH_LANGFLOW + logger.debug("Router upload_ingest endpoint called", disable_langflow_ingest=disable_langflow_ingest) # Route based on configuration - if DISABLE_INGEST_WITH_LANGFLOW: - # Route to traditional OpenRAG upload - logger.debug("Routing to traditional OpenRAG upload") - return await traditional_upload(request, document_service, session_manager) + if disable_langflow_ingest: + # Traditional OpenRAG path: create a background task via TaskService + logger.debug("Routing to traditional OpenRAG upload via task service (async)") + form = await request.form() + upload_files = form.getlist("file") + if not upload_files: + return JSONResponse({"error": "Missing file"}, status_code=400) + # Extract user context + ctx = await extract_user_context(request) + + # Create temporary files + temp_file_paths = await create_temp_files_from_form_files(upload_files) + try: + # Create traditional upload task for all files + task_id = await task_service.create_upload_task( + ctx["owner_user_id"], + temp_file_paths, + jwt_token=ctx["jwt_token"], + owner_name=ctx["owner_name"], + owner_email=ctx["owner_email"], + ) + return JSONResponse( + { + "task_id": task_id, + "message": f"Traditional upload task created for {len(upload_files)} file(s)", + "file_count": len(upload_files), + }, + status_code=201, + ) + except Exception: + # Clean up temp files on error + import os + for p in temp_file_paths: + try: + if os.path.exists(p): + os.unlink(p) + except Exception: + pass + raise else: - # Route to Langflow upload and ingest using task service - logger.debug("Routing to Langflow upload-ingest pipeline via task service") - return await langflow_upload_ingest_task(request, langflow_file_service, session_manager, task_service) + # Route to Langflow upload-ingest via task service for async processing (202 + task_id) + logger.debug("Routing to Langflow upload-ingest pipeline via task service (async)") + return await langflow_upload_ingest_task( + request, langflow_file_service, session_manager, task_service + ) except Exception as e: logger.error("Error in upload_ingest_router", error=str(e)) @@ -98,37 +132,19 @@ async def langflow_upload_ingest_task( logger.error("Invalid tweaks JSON", error=str(e)) return JSONResponse({"error": "Invalid tweaks JSON"}, status_code=400) - # Get user info from request state - user = getattr(request.state, "user", None) - user_id = user.user_id if user else None - user_name = user.name if user else None - user_email = user.email if user else None - jwt_token = getattr(request.state, "jwt_token", None) - - if not user_id: - return JSONResponse({"error": "User authentication required"}, status_code=401) + # Get user/auth context (allows no-auth mode) + ctx = await extract_user_context(request) + user_id = ctx["owner_user_id"] + user_name = ctx["owner_name"] + user_email = ctx["owner_email"] + jwt_token = ctx["jwt_token"] # Create temporary files for task processing - import tempfile import os temp_file_paths = [] try: - for upload_file in upload_files: - # Read file content - content = await upload_file.read() - - # Create temporary file - safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_") - temp_fd, temp_path = tempfile.mkstemp( - suffix=f"_{safe_filename}" - ) - - # Write content to temp file - with os.fdopen(temp_fd, 'wb') as temp_file: - temp_file.write(content) - - temp_file_paths.append(temp_path) + temp_file_paths = await create_temp_files_from_form_files(upload_files) logger.debug( "Created temporary files for task-based processing", @@ -160,7 +176,7 @@ async def langflow_upload_ingest_task( "task_id": task_id, "message": f"Langflow upload task created for {len(upload_files)} file(s)", "file_count": len(upload_files) - }, status_code=202) # 202 Accepted for async processing + }, status_code=201) except Exception: # Clean up temp files on error diff --git a/src/api/upload.py b/src/api/upload.py index d845e978..bd820d40 100644 --- a/src/api/upload.py +++ b/src/api/upload.py @@ -3,46 +3,7 @@ from urllib.parse import urlparse import boto3 from starlette.requests import Request from starlette.responses import JSONResponse - - -async def upload(request: Request, document_service, session_manager): - """Upload a single file""" - try: - form = await request.form() - upload_file = form["file"] - user = request.state.user - jwt_token = request.state.jwt_token - - from config.settings import is_no_auth_mode - - # In no-auth mode, pass None for owner fields so documents have no owner - # This allows all users to see them when switching to auth mode - if is_no_auth_mode(): - owner_user_id = None - owner_name = None - owner_email = None - else: - owner_user_id = user.user_id - owner_name = user.name - owner_email = user.email - - result = await document_service.process_upload_file( - upload_file, - owner_user_id=owner_user_id, - jwt_token=jwt_token, - owner_name=owner_name, - owner_email=owner_email, - ) - return JSONResponse(result, status_code=201) # Created - except Exception as e: - error_msg = str(e) - if ( - "AuthenticationException" in error_msg - or "access denied" in error_msg.lower() - ): - return JSONResponse({"error": error_msg}, status_code=403) - else: - return JSONResponse({"error": error_msg}, status_code=500) +from .upload_utils import extract_user_context async def upload_path(request: Request, task_service, session_manager, langflow_file_service): @@ -59,20 +20,11 @@ async def upload_path(request: Request, task_service, session_manager, langflow_ if not file_paths: return JSONResponse({"error": "No files found in directory"}, status_code=400) - user = request.state.user - jwt_token = request.state.jwt_token - - from config.settings import is_no_auth_mode - - # In no-auth mode, pass None for owner fields so documents have no owner - if is_no_auth_mode(): - owner_user_id = None - owner_name = None - owner_email = None - else: - owner_user_id = user.user_id - owner_name = user.name - owner_email = user.email + ctx = await extract_user_context(request) + owner_user_id = ctx["owner_user_id"] + owner_name = ctx["owner_name"] + owner_email = ctx["owner_email"] + jwt_token = ctx["jwt_token"] from config.settings import DISABLE_INGEST_WITH_LANGFLOW @@ -184,23 +136,15 @@ async def upload_bucket(request: Request, task_service, session_manager): if not keys: return JSONResponse({"error": "No files found in bucket"}, status_code=400) - user = request.state.user - jwt_token = request.state.jwt_token - from models.processors import S3FileProcessor - from config.settings import is_no_auth_mode + from .upload_utils import extract_user_context - # In no-auth mode, pass None for owner fields so documents have no owner - if is_no_auth_mode(): - owner_user_id = None - owner_name = None - owner_email = None - task_user_id = None - else: - owner_user_id = user.user_id - owner_name = user.name - owner_email = user.email - task_user_id = user.user_id + ctx = await extract_user_context(request) + owner_user_id = ctx["owner_user_id"] + owner_name = ctx["owner_name"] + owner_email = ctx["owner_email"] + jwt_token = ctx["jwt_token"] + task_user_id = owner_user_id processor = S3FileProcessor( task_service.document_service, diff --git a/src/api/upload_utils.py b/src/api/upload_utils.py new file mode 100644 index 00000000..f2479107 --- /dev/null +++ b/src/api/upload_utils.py @@ -0,0 +1,47 @@ +from typing import List + +from starlette.requests import Request + + +async def extract_user_context(request: Request) -> dict: + """Extract user/auth context from request.state. Honors no-auth mode.""" + from config.settings import is_no_auth_mode + + user = getattr(request.state, "user", None) + jwt_token = getattr(request.state, "jwt_token", None) + + if is_no_auth_mode(): + return { + "owner_user_id": None, + "owner_name": None, + "owner_email": None, + "jwt_token": None, + } + + return { + "owner_user_id": getattr(user, "user_id", None), + "owner_name": getattr(user, "name", None), + "owner_email": getattr(user, "email", None), + "jwt_token": jwt_token, + } + + +async def create_temp_files_from_form_files(upload_files: List) -> list[str]: + """Persist UploadFile items to temp files; return list of paths.""" + import tempfile + import os + + temp_file_paths: list[str] = [] + for upload_file in upload_files: + content = await upload_file.read() + safe_filename = ( + upload_file.filename.replace(" ", "_").replace("/", "_") + if getattr(upload_file, "filename", None) + else "uploaded" + ) + fd, temp_path = tempfile.mkstemp(suffix=f"_{safe_filename}") + with os.fdopen(fd, "wb") as temp_file: + temp_file.write(content) + temp_file_paths.append(temp_path) + return temp_file_paths + diff --git a/src/main.py b/src/main.py index 1912f7df..bb745451 100644 --- a/src/main.py +++ b/src/main.py @@ -263,96 +263,60 @@ async def ingest_default_documents_when_ready(services): async def _ingest_default_documents_langflow(services, file_paths): - """Ingest default documents using Langflow upload-ingest-delete pipeline.""" + """Ingest default documents using Langflow via a single background task (aligned with router semantics).""" langflow_file_service = services["langflow_file_service"] session_manager = services["session_manager"] logger.info( - "Using Langflow ingestion pipeline for default documents", + "Using Langflow ingestion pipeline for default documents (task-based)", file_count=len(file_paths), ) - success_count = 0 - error_count = 0 + # Use AnonymousUser for default documents + from session_manager import AnonymousUser - for file_path in file_paths: - try: - logger.debug("Processing file with Langflow pipeline", file_path=file_path) + anonymous_user = AnonymousUser() - # Read file content - with open(file_path, "rb") as f: - content = f.read() + # Ensure an (anonymous) JWT is available for OpenSearch/flow auth + effective_jwt = None + try: + session_manager.get_user_opensearch_client(anonymous_user.user_id, None) + if hasattr(session_manager, "_anonymous_jwt"): + effective_jwt = session_manager._anonymous_jwt + except Exception: + pass - # Create file tuple for upload - filename = os.path.basename(file_path) - # Determine content type based on file extension - content_type, _ = mimetypes.guess_type(filename) - if not content_type: - content_type = "application/octet-stream" + # Prepare tweaks with anonymous metadata for OpenSearch component + default_tweaks = { + "OpenSearchHybrid-Ve6bS": { + "docs_metadata": [ + {"key": "owner", "value": None}, + {"key": "owner_name", "value": anonymous_user.name}, + {"key": "owner_email", "value": anonymous_user.email}, + {"key": "connector_type", "value": "system_default"}, + ] + } + } - file_tuple = (filename, content, content_type) - - # Use AnonymousUser details for default documents - from session_manager import AnonymousUser - - anonymous_user = AnonymousUser() - - # Get JWT token using same logic as DocumentFileProcessor - # This will handle anonymous JWT creation if needed for anonymous user - effective_jwt = None - - # Let session manager handle anonymous JWT creation if needed - if session_manager: - # This call will create anonymous JWT if needed (same as DocumentFileProcessor) - session_manager.get_user_opensearch_client( - anonymous_user.user_id, effective_jwt - ) - # Get the JWT that was created by session manager - if hasattr(session_manager, "_anonymous_jwt"): - effective_jwt = session_manager._anonymous_jwt - - # Prepare tweaks for default documents with anonymous user metadata - default_tweaks = { - "OpenSearchHybrid-Ve6bS": { - "docs_metadata": [ - {"key": "owner", "value": None}, - {"key": "owner_name", "value": anonymous_user.name}, - {"key": "owner_email", "value": anonymous_user.email}, - {"key": "connector_type", "value": "system_default"}, - ] - } - } - - # Use langflow upload_and_ingest_file method with JWT token - result = await langflow_file_service.upload_and_ingest_file( - file_tuple=file_tuple, - session_id=None, # No session for default documents - tweaks=default_tweaks, # Add anonymous user metadata - settings=None, # Use default ingestion settings - jwt_token=effective_jwt, # Use JWT token (anonymous if needed) - delete_after_ingest=True, # Clean up after ingestion - ) - - logger.info( - "Successfully ingested file via Langflow", - file_path=file_path, - result_status=result.get("status"), - ) - success_count += 1 - - except Exception as e: - logger.error( - "Failed to ingest file via Langflow", - file_path=file_path, - error=str(e), - ) - error_count += 1 + # Create a single task to process all default documents through Langflow + task_id = await services["task_service"].create_langflow_upload_task( + user_id=anonymous_user.user_id, + file_paths=file_paths, + langflow_file_service=langflow_file_service, + session_manager=session_manager, + jwt_token=effective_jwt, + owner_name=anonymous_user.name, + owner_email=anonymous_user.email, + session_id=None, + tweaks=default_tweaks, + settings=None, + delete_after_ingest=True, + ) logger.info( - "Langflow ingestion completed", - success_count=success_count, - error_count=error_count, - total_files=len(file_paths), + "Started Langflow ingestion task for default documents", + task_id=task_id, + file_count=len(file_paths), ) @@ -486,41 +450,7 @@ async def create_app(): # Create route handlers with service dependencies injected routes = [ - # Upload endpoints - Route( - "/upload", - require_auth(services["session_manager"])( - partial( - upload.upload, - document_service=services["document_service"], - session_manager=services["session_manager"], - ) - ), - methods=["POST"], - ), - # Langflow Files endpoints - Route( - "/langflow/files/upload", - optional_auth(services["session_manager"])( - partial( - langflow_files.upload_user_file, - langflow_file_service=services["langflow_file_service"], - session_manager=services["session_manager"], - ) - ), - methods=["POST"], - ), - Route( - "/langflow/ingest", - require_auth(services["session_manager"])( - partial( - langflow_files.run_ingestion, - langflow_file_service=services["langflow_file_service"], - session_manager=services["session_manager"], - ) - ), - methods=["POST"], - ), + # Langflow direct upload/ingest endpoints removed in favor of router (/router/upload_ingest) Route( "/langflow/files", require_auth(services["session_manager"])( @@ -532,18 +462,6 @@ async def create_app(): ), methods=["DELETE"], ), - Route( - "/langflow/upload_ingest", - require_auth(services["session_manager"])( - partial( - langflow_files.upload_and_ingest_user_file, - langflow_file_service=services["langflow_file_service"], - session_manager=services["session_manager"], - task_service=services["task_service"], - ) - ), - methods=["POST"], - ), Route( "/upload_context", require_auth(services["session_manager"])( @@ -939,7 +857,7 @@ async def create_app(): methods=["POST"], ), Route( - "/router/upload_ingest", + "/upload", require_auth(services["session_manager"])( partial( router.upload_ingest_router, @@ -969,6 +887,33 @@ async def create_app(): @app.on_event("shutdown") async def shutdown_event(): await cleanup_subscriptions_proper(services) + # Close HTTP/OpenSearch clients cleanly + try: + from config.settings import clients as _clients + + if getattr(_clients, "langflow_http_client", None): + try: + await _clients.langflow_http_client.aclose() + except Exception: + pass + if getattr(_clients, "opensearch", None): + try: + await _clients.opensearch.close() + except Exception: + pass + except Exception: + pass + # Close any per-user OpenSearch clients + try: + sm = services.get("session_manager") + if sm and getattr(sm, "user_opensearch_clients", None): + for oc in sm.user_opensearch_clients.values(): + try: + await oc.close() + except Exception: + pass + except Exception: + pass return app diff --git a/src/services/document_service.py b/src/services/document_service.py index 22f61411..98e2c2a1 100644 --- a/src/services/document_service.py +++ b/src/services/document_service.py @@ -215,7 +215,12 @@ class DocumentService: ): """Process an uploaded file from form data""" sha256 = hashlib.sha256() - tmp = tempfile.NamedTemporaryFile(delete=False) + # Preserve file extension so the converter can detect format + try: + _, ext = os.path.splitext(getattr(upload_file, "filename", "") or "") + except Exception: + ext = "" + tmp = tempfile.NamedTemporaryFile(delete=False, suffix=ext) file_size = 0 try: while True: From 1e5661757bcd8681e1e5450e74354e77b00d5ab5 Mon Sep 17 00:00:00 2001 From: phact Date: Thu, 11 Sep 2025 16:05:57 -0400 Subject: [PATCH 07/58] integration tests v0 --- pyproject.toml | 4 + tests/__init__.py | 1 + tests/conftest.py | 80 ++++++++++ tests/integration/__init__.py | 1 + tests/integration/test_api_endpoints.py | 193 +++++++++++++++++++++++ tests/integration/test_startup_ingest.py | 114 +++++++++++++ uv.lock | 124 +++++++++++++++ 7 files changed, 517 insertions(+) create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/integration/__init__.py create mode 100644 tests/integration/test_api_endpoints.py create mode 100644 tests/integration/test_startup_ingest.py diff --git a/pyproject.toml b/pyproject.toml index 6065f077..04200e93 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,10 @@ dependencies = [ "python-dotenv>=1.0.0", "textual-fspicker>=0.6.0", "structlog>=25.4.0", + "pytest>=8.0.0", + "pytest-asyncio>=0.21.0", + "pytest-mock>=3.12.0", + "pytest-cov>=4.0.0", ] [project.scripts] diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..5f19b37d --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +# Test package \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..2edf3d65 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,80 @@ +import asyncio +import os +import tempfile +from pathlib import Path + +import pytest +import pytest_asyncio +from dotenv import load_dotenv + +# Load environment variables +load_dotenv() + +# Force no-auth mode for testing by removing OAuth credentials +# This ensures anonymous JWT tokens are created automatically +os.environ.pop('GOOGLE_OAUTH_CLIENT_ID', None) +os.environ.pop('GOOGLE_OAUTH_CLIENT_SECRET', None) + +from src.config.settings import clients +from src.session_manager import SessionManager + + +@pytest.fixture(scope="session") +def event_loop(): + """Create an instance of the default event loop for the test session.""" + loop = asyncio.get_event_loop_policy().new_event_loop() + yield loop + loop.close() + + +@pytest_asyncio.fixture +async def opensearch_client(): + """OpenSearch client for testing - requires running OpenSearch.""" + await clients.initialize() + yield clients.opensearch + # Cleanup test indices after tests + try: + await clients.opensearch.indices.delete(index="test_documents") + except Exception: + pass + + +@pytest.fixture +def session_manager(): + """Session manager for testing.""" + return SessionManager("test-secret-key") + + +@pytest.fixture +def test_documents_dir(): + """Create a temporary directory with test documents.""" + with tempfile.TemporaryDirectory() as temp_dir: + test_dir = Path(temp_dir) + + # Create some test files in supported formats + (test_dir / "test1.md").write_text("# Machine Learning Document\n\nThis is a test document about machine learning.") + (test_dir / "test2.md").write_text("# AI Document\n\nAnother document discussing artificial intelligence.") + (test_dir / "test3.md").write_text("# Data Science Document\n\nThis is a markdown file about data science.") + + # Create subdirectory with files + sub_dir = test_dir / "subdir" + sub_dir.mkdir() + (sub_dir / "nested.md").write_text("# Neural Networks\n\nNested document about neural networks.") + + yield test_dir + + +@pytest.fixture +def test_single_file(): + """Create a single test file.""" + with tempfile.NamedTemporaryFile(mode='w', suffix='_test_document.md', delete=False) as f: + f.write("# Single Test Document\n\nThis is a test document about OpenRAG testing framework. This document contains multiple sentences to ensure proper chunking. The content should be indexed and searchable in OpenSearch after processing.") + temp_path = f.name + + yield temp_path + + # Cleanup + try: + os.unlink(temp_path) + except FileNotFoundError: + pass \ No newline at end of file diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 00000000..e27cd7ab --- /dev/null +++ b/tests/integration/__init__.py @@ -0,0 +1 @@ +# Integration tests package \ No newline at end of file diff --git a/tests/integration/test_api_endpoints.py b/tests/integration/test_api_endpoints.py new file mode 100644 index 00000000..e2ae3c18 --- /dev/null +++ b/tests/integration/test_api_endpoints.py @@ -0,0 +1,193 @@ +import asyncio +import os +from pathlib import Path + +import httpx +import pytest + + +async def wait_for_service_ready(client: httpx.AsyncClient, timeout_s: float = 30.0): + """Poll existing endpoints until the app and OpenSearch are ready. + + Strategy: + - GET /auth/me should return 200 immediately (confirms app is up). + - POST /search with query "*" avoids embeddings and checks OpenSearch/index readiness. + """ + deadline = asyncio.get_event_loop().time() + timeout_s + last_err = None + while asyncio.get_event_loop().time() < deadline: + try: + r1 = await client.get("/auth/me") + if r1.status_code != 200: + await asyncio.sleep(0.5) + continue + # match_all readiness probe; no embeddings + r2 = await client.post("/search", json={"query": "*", "limit": 0}) + if r2.status_code == 200: + return + last_err = r2.text + except Exception as e: + last_err = str(e) + await asyncio.sleep(0.5) + raise AssertionError(f"Service not ready in time: {last_err}") + + +@pytest.mark.parametrize("disable_langflow_ingest", [True, False]) +@pytest.mark.asyncio +async def test_upload_and_search_endpoint(tmp_path: Path, disable_langflow_ingest: bool): + """Boot the ASGI app and exercise /upload and /search endpoints.""" + # Ensure we route uploads to traditional processor and disable startup ingest + os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = "true" if disable_langflow_ingest else "false" + os.environ["DISABLE_STARTUP_INGEST"] = "true" + # Force no-auth mode so endpoints bypass authentication + os.environ["GOOGLE_OAUTH_CLIENT_ID"] = "" + os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = "" + + # Import after env vars to ensure settings pick them up. Clear cached modules + import sys + # Clear cached modules so settings pick up env and router sees new flag + for mod in [ + "src.api.router", + "src.api.connector_router", + "src.config.settings", + "src.main", + ]: + sys.modules.pop(mod, None) + from src.main import create_app, startup_tasks + from src.config.settings import clients, INDEX_NAME + + # Ensure a clean index before startup + await clients.initialize() + try: + await clients.opensearch.indices.delete(index=INDEX_NAME) + except Exception: + pass + + app = await create_app() + # Manually run startup tasks since httpx ASGI transport here doesn't manage lifespan + await startup_tasks(app.state.services) + + transport = httpx.ASGITransport(app=app) + async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client: + # Wait for app + OpenSearch readiness using existing endpoints + await wait_for_service_ready(client) + + # Create a temporary markdown file to upload + file_path = tmp_path / "endpoint_test_doc.md" + file_text = ( + "# Single Test Document\n\n" + "This is a test document about OpenRAG testing framework. " + "The content should be indexed and searchable in OpenSearch after processing." + ) + file_path.write_text(file_text) + + # POST via router (multipart) + files = { + "file": ( + file_path.name, + file_path.read_bytes(), + "text/markdown", + ) + } + upload_resp = await client.post("/upload", files=files) + body = upload_resp.json() + # Router now returns 201 + task_id (async) regardless of mode + assert upload_resp.status_code == 201, upload_resp.text + assert isinstance(body.get("task_id"), str) + + # Poll search for the specific content until it's indexed + async def _wait_for_indexed(timeout_s: float = 30.0): + deadline = asyncio.get_event_loop().time() + timeout_s + while asyncio.get_event_loop().time() < deadline: + resp = await client.post( + "/search", + json={"query": "OpenRAG testing framework", "limit": 5}, + ) + if resp.status_code == 200 and resp.json().get("results"): + return resp + await asyncio.sleep(0.5) + return resp + + search_resp = await _wait_for_indexed() + + # POST /search + assert search_resp.status_code == 200, search_resp.text + search_body = search_resp.json() + + # Basic shape and at least one hit + assert isinstance(search_body.get("results"), list) + assert len(search_body["results"]) >= 0 + # When hits exist, confirm our phrase is present in top result content + if search_body["results"]: + top = search_body["results"][0] + assert "text" in top or "content" in top + text = top.get("text") or top.get("content") + assert isinstance(text, str) + assert "testing" in text.lower() + # Explicitly close global clients to avoid aiohttp warnings + from src.config.settings import clients + try: + if getattr(clients, "opensearch", None): + await clients.opensearch.close() + if getattr(clients, "langflow_http_client", None): + await clients.langflow_http_client.aclose() + except Exception: + pass + + +@pytest.mark.parametrize("disable_langflow_ingest", [True, False]) +@pytest.mark.asyncio +async def test_router_upload_ingest_traditional(tmp_path: Path, disable_langflow_ingest: bool): + """Exercise the router endpoint to ensure it routes to traditional upload when Langflow ingest is disabled.""" + os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = "true" if disable_langflow_ingest else "false" + os.environ["DISABLE_STARTUP_INGEST"] = "true" + os.environ["GOOGLE_OAUTH_CLIENT_ID"] = "" + os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = "" + + import sys + for mod in [ + "src.api.router", + "src.api.connector_router", + "src.config.settings", + "src.main", + ]: + sys.modules.pop(mod, None) + from src.main import create_app, startup_tasks + from src.config.settings import clients, INDEX_NAME + + # Ensure a clean index before startup + await clients.initialize() + try: + await clients.opensearch.indices.delete(index=INDEX_NAME) + except Exception: + pass + + app = await create_app() + await startup_tasks(app.state.services) + transport = httpx.ASGITransport(app=app) + async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client: + await wait_for_service_ready(client) + + file_path = tmp_path / "router_test_doc.md" + file_path.write_text("# Router Test\n\nThis file validates the upload router.") + + files = { + "file": ( + file_path.name, + file_path.read_bytes(), + "text/markdown", + ) + } + + resp = await client.post("/upload", files=files) + data = resp.json() + assert resp.status_code == 201, resp.text + assert isinstance(data.get("task_id"), str) + from src.config.settings import clients + try: + if getattr(clients, "opensearch", None): + await clients.opensearch.close() + if getattr(clients, "langflow_http_client", None): + await clients.langflow_http_client.aclose() + except Exception: + pass diff --git a/tests/integration/test_startup_ingest.py b/tests/integration/test_startup_ingest.py new file mode 100644 index 00000000..5ce62a94 --- /dev/null +++ b/tests/integration/test_startup_ingest.py @@ -0,0 +1,114 @@ +import asyncio +import os +from pathlib import Path + +import httpx +import pytest + + +async def wait_for_ready(client: httpx.AsyncClient, timeout_s: float = 30.0): + deadline = asyncio.get_event_loop().time() + timeout_s + last_err = None + while asyncio.get_event_loop().time() < deadline: + try: + r1 = await client.get("/auth/me") + if r1.status_code != 200: + await asyncio.sleep(0.5) + continue + r2 = await client.post("/search", json={"query": "*", "limit": 0}) + if r2.status_code == 200: + return + last_err = r2.text + except Exception as e: + last_err = str(e) + await asyncio.sleep(0.5) + raise AssertionError(f"Service not ready in time: {last_err}") + + +def count_files_in_documents() -> int: + base_dir = Path(os.getcwd()) / "documents" + if not base_dir.is_dir(): + return 0 + return sum(1 for _ in base_dir.rglob("*") if _.is_file()) + + +@pytest.mark.parametrize("disable_langflow_ingest", [True, False]) +@pytest.mark.asyncio +async def test_startup_ingest_creates_task(disable_langflow_ingest: bool): + # Ensure startup ingest runs and choose pipeline per param + os.environ["DISABLE_STARTUP_INGEST"] = "false" + os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = ( + "true" if disable_langflow_ingest else "false" + ) + # Force no-auth mode for simpler endpoint access + os.environ["GOOGLE_OAUTH_CLIENT_ID"] = "" + os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = "" + + # Reload settings to pick up env for this test run + import sys + + for mod in [ + "src.api.router", + "src.api.connector_router", + "src.config.settings", + "src.main", + ]: + sys.modules.pop(mod, None) + + from src.main import create_app, startup_tasks + from src.config.settings import clients, INDEX_NAME + + # Ensure a clean index before startup + await clients.initialize() + try: + await clients.opensearch.indices.delete(index=INDEX_NAME) + except Exception: + pass + + app = await create_app() + # Trigger startup tasks explicitly + await startup_tasks(app.state.services) + + transport = httpx.ASGITransport(app=app) + async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client: + await wait_for_ready(client) + + expected_files = count_files_in_documents() + + # Poll /tasks until we see at least one startup ingest task + async def _wait_for_task(timeout_s: float = 60.0): + deadline = asyncio.get_event_loop().time() + timeout_s + last = None + while asyncio.get_event_loop().time() < deadline: + resp = await client.get("/tasks") + if resp.status_code == 200: + data = resp.json() + last = data + tasks = data.get("tasks") if isinstance(data, dict) else None + if isinstance(tasks, list) and len(tasks) > 0: + return tasks + await asyncio.sleep(0.5) + return last.get("tasks") if isinstance(last, dict) else last + + tasks = await _wait_for_task() + if expected_files == 0: + return # Nothing to do + if not (isinstance(tasks, list) and len(tasks) > 0): + # Fallback: verify that documents were indexed as a sign of startup ingest + sr = await client.post("/search", json={"query": "*", "limit": 1}) + assert sr.status_code == 200, sr.text + total = sr.json().get("total") + assert isinstance(total, int) and total >= 0, "Startup ingest did not index documents" + return + newest = tasks[0] + assert "task_id" in newest + assert newest.get("total_files") == expected_files + # Explicitly close global clients to avoid aiohttp warnings + from src.config.settings import clients + try: + if getattr(clients, "opensearch", None): + await clients.opensearch.close() + if getattr(clients, "langflow_http_client", None): + await clients.langflow_http_client.aclose() + except Exception: + pass diff --git a/uv.lock b/uv.lock index 08a14492..40e7f39a 100644 --- a/uv.lock +++ b/uv.lock @@ -243,6 +243,59 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, ] +[[package]] +name = "coverage" +version = "7.10.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/14/70/025b179c993f019105b79575ac6edb5e084fb0f0e63f15cdebef4e454fb5/coverage-7.10.6.tar.gz", hash = "sha256:f644a3ae5933a552a29dbb9aa2f90c677a875f80ebea028e5a52a4f429044b90", size = 823736, upload-time = "2025-08-29T15:35:16.668Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bd/e7/917e5953ea29a28c1057729c1d5af9084ab6d9c66217523fd0e10f14d8f6/coverage-7.10.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ffea0575345e9ee0144dfe5701aa17f3ba546f8c3bb48db62ae101afb740e7d6", size = 217351, upload-time = "2025-08-29T15:33:45.438Z" }, + { url = "https://files.pythonhosted.org/packages/eb/86/2e161b93a4f11d0ea93f9bebb6a53f113d5d6e416d7561ca41bb0a29996b/coverage-7.10.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:95d91d7317cde40a1c249d6b7382750b7e6d86fad9d8eaf4fa3f8f44cf171e80", size = 217600, upload-time = "2025-08-29T15:33:47.269Z" }, + { url = "https://files.pythonhosted.org/packages/0e/66/d03348fdd8df262b3a7fb4ee5727e6e4936e39e2f3a842e803196946f200/coverage-7.10.6-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3e23dd5408fe71a356b41baa82892772a4cefcf758f2ca3383d2aa39e1b7a003", size = 248600, upload-time = "2025-08-29T15:33:48.953Z" }, + { url = "https://files.pythonhosted.org/packages/73/dd/508420fb47d09d904d962f123221bc249f64b5e56aa93d5f5f7603be475f/coverage-7.10.6-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0f3f56e4cb573755e96a16501a98bf211f100463d70275759e73f3cbc00d4f27", size = 251206, upload-time = "2025-08-29T15:33:50.697Z" }, + { url = "https://files.pythonhosted.org/packages/e9/1f/9020135734184f439da85c70ea78194c2730e56c2d18aee6e8ff1719d50d/coverage-7.10.6-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:db4a1d897bbbe7339946ffa2fe60c10cc81c43fab8b062d3fcb84188688174a4", size = 252478, upload-time = "2025-08-29T15:33:52.303Z" }, + { url = "https://files.pythonhosted.org/packages/a4/a4/3d228f3942bb5a2051fde28c136eea23a761177dc4ff4ef54533164ce255/coverage-7.10.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d8fd7879082953c156d5b13c74aa6cca37f6a6f4747b39538504c3f9c63d043d", size = 250637, upload-time = "2025-08-29T15:33:53.67Z" }, + { url = "https://files.pythonhosted.org/packages/36/e3/293dce8cdb9a83de971637afc59b7190faad60603b40e32635cbd15fbf61/coverage-7.10.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:28395ca3f71cd103b8c116333fa9db867f3a3e1ad6a084aa3725ae002b6583bc", size = 248529, upload-time = "2025-08-29T15:33:55.022Z" }, + { url = "https://files.pythonhosted.org/packages/90/26/64eecfa214e80dd1d101e420cab2901827de0e49631d666543d0e53cf597/coverage-7.10.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:61c950fc33d29c91b9e18540e1aed7d9f6787cc870a3e4032493bbbe641d12fc", size = 250143, upload-time = "2025-08-29T15:33:56.386Z" }, + { url = "https://files.pythonhosted.org/packages/3e/70/bd80588338f65ea5b0d97e424b820fb4068b9cfb9597fbd91963086e004b/coverage-7.10.6-cp313-cp313-win32.whl", hash = "sha256:160c00a5e6b6bdf4e5984b0ef21fc860bc94416c41b7df4d63f536d17c38902e", size = 219770, upload-time = "2025-08-29T15:33:58.063Z" }, + { url = "https://files.pythonhosted.org/packages/a7/14/0b831122305abcc1060c008f6c97bbdc0a913ab47d65070a01dc50293c2b/coverage-7.10.6-cp313-cp313-win_amd64.whl", hash = "sha256:628055297f3e2aa181464c3808402887643405573eb3d9de060d81531fa79d32", size = 220566, upload-time = "2025-08-29T15:33:59.766Z" }, + { url = "https://files.pythonhosted.org/packages/83/c6/81a83778c1f83f1a4a168ed6673eeedc205afb562d8500175292ca64b94e/coverage-7.10.6-cp313-cp313-win_arm64.whl", hash = "sha256:df4ec1f8540b0bcbe26ca7dd0f541847cc8a108b35596f9f91f59f0c060bfdd2", size = 219195, upload-time = "2025-08-29T15:34:01.191Z" }, + { url = "https://files.pythonhosted.org/packages/d7/1c/ccccf4bf116f9517275fa85047495515add43e41dfe8e0bef6e333c6b344/coverage-7.10.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:c9a8b7a34a4de3ed987f636f71881cd3b8339f61118b1aa311fbda12741bff0b", size = 218059, upload-time = "2025-08-29T15:34:02.91Z" }, + { url = "https://files.pythonhosted.org/packages/92/97/8a3ceff833d27c7492af4f39d5da6761e9ff624831db9e9f25b3886ddbca/coverage-7.10.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8dd5af36092430c2b075cee966719898f2ae87b636cefb85a653f1d0ba5d5393", size = 218287, upload-time = "2025-08-29T15:34:05.106Z" }, + { url = "https://files.pythonhosted.org/packages/92/d8/50b4a32580cf41ff0423777a2791aaf3269ab60c840b62009aec12d3970d/coverage-7.10.6-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:b0353b0f0850d49ada66fdd7d0c7cdb0f86b900bb9e367024fd14a60cecc1e27", size = 259625, upload-time = "2025-08-29T15:34:06.575Z" }, + { url = "https://files.pythonhosted.org/packages/7e/7e/6a7df5a6fb440a0179d94a348eb6616ed4745e7df26bf2a02bc4db72c421/coverage-7.10.6-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d6b9ae13d5d3e8aeca9ca94198aa7b3ebbc5acfada557d724f2a1f03d2c0b0df", size = 261801, upload-time = "2025-08-29T15:34:08.006Z" }, + { url = "https://files.pythonhosted.org/packages/3a/4c/a270a414f4ed5d196b9d3d67922968e768cd971d1b251e1b4f75e9362f75/coverage-7.10.6-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:675824a363cc05781b1527b39dc2587b8984965834a748177ee3c37b64ffeafb", size = 264027, upload-time = "2025-08-29T15:34:09.806Z" }, + { url = "https://files.pythonhosted.org/packages/9c/8b/3210d663d594926c12f373c5370bf1e7c5c3a427519a8afa65b561b9a55c/coverage-7.10.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:692d70ea725f471a547c305f0d0fc6a73480c62fb0da726370c088ab21aed282", size = 261576, upload-time = "2025-08-29T15:34:11.585Z" }, + { url = "https://files.pythonhosted.org/packages/72/d0/e1961eff67e9e1dba3fc5eb7a4caf726b35a5b03776892da8d79ec895775/coverage-7.10.6-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:851430a9a361c7a8484a36126d1d0ff8d529d97385eacc8dfdc9bfc8c2d2cbe4", size = 259341, upload-time = "2025-08-29T15:34:13.159Z" }, + { url = "https://files.pythonhosted.org/packages/3a/06/d6478d152cd189b33eac691cba27a40704990ba95de49771285f34a5861e/coverage-7.10.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:d9369a23186d189b2fc95cc08b8160ba242057e887d766864f7adf3c46b2df21", size = 260468, upload-time = "2025-08-29T15:34:14.571Z" }, + { url = "https://files.pythonhosted.org/packages/ed/73/737440247c914a332f0b47f7598535b29965bf305e19bbc22d4c39615d2b/coverage-7.10.6-cp313-cp313t-win32.whl", hash = "sha256:92be86fcb125e9bda0da7806afd29a3fd33fdf58fba5d60318399adf40bf37d0", size = 220429, upload-time = "2025-08-29T15:34:16.394Z" }, + { url = "https://files.pythonhosted.org/packages/bd/76/b92d3214740f2357ef4a27c75a526eb6c28f79c402e9f20a922c295c05e2/coverage-7.10.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6b3039e2ca459a70c79523d39347d83b73f2f06af5624905eba7ec34d64d80b5", size = 221493, upload-time = "2025-08-29T15:34:17.835Z" }, + { url = "https://files.pythonhosted.org/packages/fc/8e/6dcb29c599c8a1f654ec6cb68d76644fe635513af16e932d2d4ad1e5ac6e/coverage-7.10.6-cp313-cp313t-win_arm64.whl", hash = "sha256:3fb99d0786fe17b228eab663d16bee2288e8724d26a199c29325aac4b0319b9b", size = 219757, upload-time = "2025-08-29T15:34:19.248Z" }, + { url = "https://files.pythonhosted.org/packages/d3/aa/76cf0b5ec00619ef208da4689281d48b57f2c7fde883d14bf9441b74d59f/coverage-7.10.6-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:6008a021907be8c4c02f37cdc3ffb258493bdebfeaf9a839f9e71dfdc47b018e", size = 217331, upload-time = "2025-08-29T15:34:20.846Z" }, + { url = "https://files.pythonhosted.org/packages/65/91/8e41b8c7c505d398d7730206f3cbb4a875a35ca1041efc518051bfce0f6b/coverage-7.10.6-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:5e75e37f23eb144e78940b40395b42f2321951206a4f50e23cfd6e8a198d3ceb", size = 217607, upload-time = "2025-08-29T15:34:22.433Z" }, + { url = "https://files.pythonhosted.org/packages/87/7f/f718e732a423d442e6616580a951b8d1ec3575ea48bcd0e2228386805e79/coverage-7.10.6-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0f7cb359a448e043c576f0da00aa8bfd796a01b06aa610ca453d4dde09cc1034", size = 248663, upload-time = "2025-08-29T15:34:24.425Z" }, + { url = "https://files.pythonhosted.org/packages/e6/52/c1106120e6d801ac03e12b5285e971e758e925b6f82ee9b86db3aa10045d/coverage-7.10.6-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c68018e4fc4e14b5668f1353b41ccf4bc83ba355f0e1b3836861c6f042d89ac1", size = 251197, upload-time = "2025-08-29T15:34:25.906Z" }, + { url = "https://files.pythonhosted.org/packages/3d/ec/3a8645b1bb40e36acde9c0609f08942852a4af91a937fe2c129a38f2d3f5/coverage-7.10.6-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cd4b2b0707fc55afa160cd5fc33b27ccbf75ca11d81f4ec9863d5793fc6df56a", size = 252551, upload-time = "2025-08-29T15:34:27.337Z" }, + { url = "https://files.pythonhosted.org/packages/a1/70/09ecb68eeb1155b28a1d16525fd3a9b65fbe75337311a99830df935d62b6/coverage-7.10.6-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:4cec13817a651f8804a86e4f79d815b3b28472c910e099e4d5a0e8a3b6a1d4cb", size = 250553, upload-time = "2025-08-29T15:34:29.065Z" }, + { url = "https://files.pythonhosted.org/packages/c6/80/47df374b893fa812e953b5bc93dcb1427a7b3d7a1a7d2db33043d17f74b9/coverage-7.10.6-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:f2a6a8e06bbda06f78739f40bfb56c45d14eb8249d0f0ea6d4b3d48e1f7c695d", size = 248486, upload-time = "2025-08-29T15:34:30.897Z" }, + { url = "https://files.pythonhosted.org/packages/4a/65/9f98640979ecee1b0d1a7164b589de720ddf8100d1747d9bbdb84be0c0fb/coverage-7.10.6-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:081b98395ced0d9bcf60ada7661a0b75f36b78b9d7e39ea0790bb4ed8da14747", size = 249981, upload-time = "2025-08-29T15:34:32.365Z" }, + { url = "https://files.pythonhosted.org/packages/1f/55/eeb6603371e6629037f47bd25bef300387257ed53a3c5fdb159b7ac8c651/coverage-7.10.6-cp314-cp314-win32.whl", hash = "sha256:6937347c5d7d069ee776b2bf4e1212f912a9f1f141a429c475e6089462fcecc5", size = 220054, upload-time = "2025-08-29T15:34:34.124Z" }, + { url = "https://files.pythonhosted.org/packages/15/d1/a0912b7611bc35412e919a2cd59ae98e7ea3b475e562668040a43fb27897/coverage-7.10.6-cp314-cp314-win_amd64.whl", hash = "sha256:adec1d980fa07e60b6ef865f9e5410ba760e4e1d26f60f7e5772c73b9a5b0713", size = 220851, upload-time = "2025-08-29T15:34:35.651Z" }, + { url = "https://files.pythonhosted.org/packages/ef/2d/11880bb8ef80a45338e0b3e0725e4c2d73ffbb4822c29d987078224fd6a5/coverage-7.10.6-cp314-cp314-win_arm64.whl", hash = "sha256:a80f7aef9535442bdcf562e5a0d5a5538ce8abe6bb209cfbf170c462ac2c2a32", size = 219429, upload-time = "2025-08-29T15:34:37.16Z" }, + { url = "https://files.pythonhosted.org/packages/83/c0/1f00caad775c03a700146f55536ecd097a881ff08d310a58b353a1421be0/coverage-7.10.6-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:0de434f4fbbe5af4fa7989521c655c8c779afb61c53ab561b64dcee6149e4c65", size = 218080, upload-time = "2025-08-29T15:34:38.919Z" }, + { url = "https://files.pythonhosted.org/packages/a9/c4/b1c5d2bd7cc412cbeb035e257fd06ed4e3e139ac871d16a07434e145d18d/coverage-7.10.6-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6e31b8155150c57e5ac43ccd289d079eb3f825187d7c66e755a055d2c85794c6", size = 218293, upload-time = "2025-08-29T15:34:40.425Z" }, + { url = "https://files.pythonhosted.org/packages/3f/07/4468d37c94724bf6ec354e4ec2f205fda194343e3e85fd2e59cec57e6a54/coverage-7.10.6-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:98cede73eb83c31e2118ae8d379c12e3e42736903a8afcca92a7218e1f2903b0", size = 259800, upload-time = "2025-08-29T15:34:41.996Z" }, + { url = "https://files.pythonhosted.org/packages/82/d8/f8fb351be5fee31690cd8da768fd62f1cfab33c31d9f7baba6cd8960f6b8/coverage-7.10.6-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f863c08f4ff6b64fa8045b1e3da480f5374779ef187f07b82e0538c68cb4ff8e", size = 261965, upload-time = "2025-08-29T15:34:43.61Z" }, + { url = "https://files.pythonhosted.org/packages/e8/70/65d4d7cfc75c5c6eb2fed3ee5cdf420fd8ae09c4808723a89a81d5b1b9c3/coverage-7.10.6-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2b38261034fda87be356f2c3f42221fdb4171c3ce7658066ae449241485390d5", size = 264220, upload-time = "2025-08-29T15:34:45.387Z" }, + { url = "https://files.pythonhosted.org/packages/98/3c/069df106d19024324cde10e4ec379fe2fb978017d25e97ebee23002fbadf/coverage-7.10.6-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:0e93b1476b79eae849dc3872faeb0bf7948fd9ea34869590bc16a2a00b9c82a7", size = 261660, upload-time = "2025-08-29T15:34:47.288Z" }, + { url = "https://files.pythonhosted.org/packages/fc/8a/2974d53904080c5dc91af798b3a54a4ccb99a45595cc0dcec6eb9616a57d/coverage-7.10.6-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:ff8a991f70f4c0cf53088abf1e3886edcc87d53004c7bb94e78650b4d3dac3b5", size = 259417, upload-time = "2025-08-29T15:34:48.779Z" }, + { url = "https://files.pythonhosted.org/packages/30/38/9616a6b49c686394b318974d7f6e08f38b8af2270ce7488e879888d1e5db/coverage-7.10.6-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ac765b026c9f33044419cbba1da913cfb82cca1b60598ac1c7a5ed6aac4621a0", size = 260567, upload-time = "2025-08-29T15:34:50.718Z" }, + { url = "https://files.pythonhosted.org/packages/76/16/3ed2d6312b371a8cf804abf4e14895b70e4c3491c6e53536d63fd0958a8d/coverage-7.10.6-cp314-cp314t-win32.whl", hash = "sha256:441c357d55f4936875636ef2cfb3bee36e466dcf50df9afbd398ce79dba1ebb7", size = 220831, upload-time = "2025-08-29T15:34:52.653Z" }, + { url = "https://files.pythonhosted.org/packages/d5/e5/d38d0cb830abede2adb8b147770d2a3d0e7fecc7228245b9b1ae6c24930a/coverage-7.10.6-cp314-cp314t-win_amd64.whl", hash = "sha256:073711de3181b2e204e4870ac83a7c4853115b42e9cd4d145f2231e12d670930", size = 221950, upload-time = "2025-08-29T15:34:54.212Z" }, + { url = "https://files.pythonhosted.org/packages/f4/51/e48e550f6279349895b0ffcd6d2a690e3131ba3a7f4eafccc141966d4dea/coverage-7.10.6-cp314-cp314t-win_arm64.whl", hash = "sha256:137921f2bac5559334ba66122b753db6dc5d1cf01eb7b64eb412bb0d064ef35b", size = 219969, upload-time = "2025-08-29T15:34:55.83Z" }, + { url = "https://files.pythonhosted.org/packages/44/0c/50db5379b615854b5cf89146f8f5bd1d5a9693d7f3a987e269693521c404/coverage-7.10.6-py3-none-any.whl", hash = "sha256:92c4ecf6bf11b2e85fd4d8204814dc26e6a19f0c9d938c207c5cb0eadfcabbe3", size = 208986, upload-time = "2025-08-29T15:35:14.506Z" }, +] + [[package]] name = "cryptography" version = "45.0.6" @@ -738,6 +791,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" }, ] +[[package]] +name = "iniconfig" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" }, +] + [[package]] name = "jinja2" version = "3.1.6" @@ -1421,6 +1483,10 @@ dependencies = [ { name = "opensearch-py", extra = ["async"] }, { name = "psutil" }, { name = "pyjwt" }, + { name = "pytest" }, + { name = "pytest-asyncio" }, + { name = "pytest-cov" }, + { name = "pytest-mock" }, { name = "python-dotenv" }, { name = "python-multipart" }, { name = "rich" }, @@ -1448,6 +1514,10 @@ requires-dist = [ { name = "opensearch-py", extras = ["async"], specifier = ">=3.0.0" }, { name = "psutil", specifier = ">=7.0.0" }, { name = "pyjwt", specifier = ">=2.8.0" }, + { name = "pytest", specifier = ">=8.0.0" }, + { name = "pytest-asyncio", specifier = ">=0.21.0" }, + { name = "pytest-cov", specifier = ">=4.0.0" }, + { name = "pytest-mock", specifier = ">=3.12.0" }, { name = "python-dotenv", specifier = ">=1.0.0" }, { name = "python-multipart", specifier = ">=0.0.20" }, { name = "rich", specifier = ">=13.0.0" }, @@ -1831,6 +1901,60 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e1/6b/2706497c86e8d69fb76afe5ea857fe1794621aa0f3b1d863feb953fe0f22/pypdfium2-4.30.1-py3-none-win_arm64.whl", hash = "sha256:c2b6d63f6d425d9416c08d2511822b54b8e3ac38e639fc41164b1d75584b3a8c", size = 2814810, upload-time = "2024-12-19T19:28:09.857Z" }, ] +[[package]] +name = "pytest" +version = "8.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" }, +] + +[[package]] +name = "pytest-asyncio" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4e/51/f8794af39eeb870e87a8c8068642fc07bce0c854d6865d7dd0f2a9d338c2/pytest_asyncio-1.1.0.tar.gz", hash = "sha256:796aa822981e01b68c12e4827b8697108f7205020f24b5793b3c41555dab68ea", size = 46652, upload-time = "2025-07-16T04:29:26.393Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/9d/bf86eddabf8c6c9cb1ea9a869d6873b46f105a5d292d3a6f7071f5b07935/pytest_asyncio-1.1.0-py3-none-any.whl", hash = "sha256:5fe2d69607b0bd75c656d1211f969cadba035030156745ee09e7d71740e58ecf", size = 15157, upload-time = "2025-07-16T04:29:24.929Z" }, +] + +[[package]] +name = "pytest-cov" +version = "7.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "coverage" }, + { name = "pluggy" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" }, +] + +[[package]] +name = "pytest-mock" +version = "3.15.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/61/99/3323ee5c16b3637b4d941c362182d3e749c11e400bea31018c42219f3a98/pytest_mock-3.15.0.tar.gz", hash = "sha256:ab896bd190316b9d5d87b277569dfcdf718b2d049a2ccff5f7aca279c002a1cf", size = 33838, upload-time = "2025-09-04T20:57:48.679Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2b/b3/7fefc43fb706380144bcd293cc6e446e6f637ddfa8b83f48d1734156b529/pytest_mock-3.15.0-py3-none-any.whl", hash = "sha256:ef2219485fb1bd256b00e7ad7466ce26729b30eadfc7cbcdb4fa9a92ca68db6f", size = 10050, upload-time = "2025-09-04T20:57:47.274Z" }, +] + [[package]] name = "python-bidi" version = "0.6.6" From 4ca3f179745546d21b2029b45f2a276731f81211 Mon Sep 17 00:00:00 2001 From: phact Date: Thu, 11 Sep 2025 16:25:26 -0400 Subject: [PATCH 08/58] unnecessary comment --- src/main.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main.py b/src/main.py index bb745451..873dd458 100644 --- a/src/main.py +++ b/src/main.py @@ -450,7 +450,6 @@ async def create_app(): # Create route handlers with service dependencies injected routes = [ - # Langflow direct upload/ingest endpoints removed in favor of router (/router/upload_ingest) Route( "/langflow/files", require_auth(services["session_manager"])( From 2ef560ca7f13c54aaf8f8e85712bbf8f227b3f34 Mon Sep 17 00:00:00 2001 From: phact Date: Fri, 12 Sep 2025 11:54:08 -0400 Subject: [PATCH 09/58] simplify makefile --- Makefile | 63 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 24 deletions(-) diff --git a/Makefile b/Makefile index 6ac03b93..e9c0367d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,17 @@ # OpenRAG Development Makefile # Provides easy commands for development workflow -.PHONY: help dev dev-cpu dev-local infra stop clean build logs shell-backend shell-frontend install test test-integration test-unit test-ingest test-search test-coverage backend frontend install-be install-fe build-be build-fe logs-be logs-fe logs-lf logs-os shell-be shell-lf shell-os restart status health db-reset flow-upload quick setup +# Load variables from .env if present so `make` commands pick them up +ifneq (,$(wildcard .env)) + include .env + # Export all simple KEY=VALUE pairs to the environment for child processes + export $(shell sed -n 's/^\([A-Za-z_][A-Za-z0-9_]*\)=.*/\1/p' .env) +endif + +.PHONY: help dev dev-cpu dev-local infra stop clean build logs shell-backend shell-frontend install \ + test test-integration test-ci \ + backend frontend install-be install-fe build-be build-fe logs-be logs-fe logs-lf logs-os \ + shell-be shell-lf shell-os restart status health db-reset flow-upload quick setup # Default target help: @@ -32,12 +42,9 @@ help: @echo " shell-lf - Shell into langflow container" @echo "" @echo "Testing:" - @echo " test - Run all backend tests" + @echo " test - Run all backend tests" @echo " test-integration - Run integration tests (requires infra)" - @echo " test-unit - Run unit tests only" - @echo " test-ingest - Test file ingestion flows" - @echo " test-search - Test search functionality" - @echo " test-coverage - Run tests with coverage report" + @echo " test-ci - Start infra, run integration tests, tear down" @echo " lint - Run linting checks" @echo "" @@ -174,21 +181,29 @@ test-integration: @echo "๐Ÿ’ก Make sure to run 'make infra' first!" uv run pytest tests/integration/ -v -test-unit: - @echo "๐Ÿงช Running unit tests..." - uv run pytest tests/unit/ -v - -test-ingest: - @echo "๐Ÿงช Testing file ingestion flows..." - uv run pytest tests/integration/test_file_ingest.py -v - -test-search: - @echo "๐Ÿงช Testing search functionality..." - uv run pytest tests/integration/test_search_flow.py -v - -test-coverage: - @echo "๐Ÿงช Running tests with coverage report..." - uv run pytest tests/ --cov=src --cov-report=term-missing --cov-report=html:htmlcov +# CI-friendly integration test target: brings up infra, waits, runs tests, tears down +test-ci: + @set -e; \ + echo "๐Ÿš€ Starting infra (OpenSearch + Dashboards + Langflow)"; \ + make infra; \ + echo "โณ Waiting for OpenSearch..."; \ + for i in $$(seq 1 60); do \ + curl -k -s https://localhost:9200 -u admin:$${OPENSEARCH_PASSWORD} >/dev/null 2>&1 && break || sleep 2; \ + done; \ + echo "โณ Waiting for Langflow..."; \ + for i in $$(seq 1 60); do \ + curl -s http://localhost:7860/ >/dev/null 2>&1 && break || sleep 2; \ + done; \ + echo "๐Ÿงช Running integration tests"; \ + LOG_LEVEL=$${LOG_LEVEL:-DEBUG} \ + GOOGLE_OAUTH_CLIENT_ID="" \ + GOOGLE_OAUTH_CLIENT_SECRET="" \ + OPENSEARCH_HOST=localhost OPENSEARCH_PORT=9200 \ + OPENSEARCH_USERNAME=admin OPENSEARCH_PASSWORD=$${OPENSEARCH_PASSWORD} \ + DISABLE_STARTUP_INGEST=$${DISABLE_STARTUP_INGEST:-true} \ + uv run pytest tests/integration -vv -s -o log_cli=true --log-cli-level=DEBUG; \ + echo "๐Ÿงน Tearing down infra"; \ + docker compose down -v || true lint: @echo "๐Ÿ” Running linting checks..." @@ -204,13 +219,13 @@ health: @echo "๐Ÿฅ Health check:" @echo "Backend: $$(curl -s http://localhost:8000/health 2>/dev/null || echo 'Not responding')" @echo "Langflow: $$(curl -s http://localhost:7860/health 2>/dev/null || echo 'Not responding')" - @echo "OpenSearch: $$(curl -s -k -u admin:$(shell grep OPENSEARCH_PASSWORD .env | cut -d= -f2) https://localhost:9200 2>/dev/null | jq -r .tagline 2>/dev/null || echo 'Not responding')" + @echo "OpenSearch: $$(curl -s -k -u admin:$${OPENSEARCH_PASSWORD} https://localhost:9200 2>/dev/null | jq -r .tagline 2>/dev/null || echo 'Not responding')" # Database operations db-reset: @echo "๐Ÿ—„๏ธ Resetting OpenSearch indices..." - curl -X DELETE "http://localhost:9200/documents" -u admin:$$(grep OPENSEARCH_PASSWORD .env | cut -d= -f2) || true - curl -X DELETE "http://localhost:9200/knowledge_filters" -u admin:$$(grep OPENSEARCH_PASSWORD .env | cut -d= -f2) || true + curl -X DELETE "http://localhost:9200/documents" -u admin:$${OPENSEARCH_PASSWORD} || true + curl -X DELETE "http://localhost:9200/knowledge_filters" -u admin:$${OPENSEARCH_PASSWORD} || true @echo "Indices reset. Restart backend to recreate." # Flow management From e23ed258c932c72ca67518fcafaf0665c6156a7c Mon Sep 17 00:00:00 2001 From: phact Date: Fri, 12 Sep 2025 11:54:28 -0400 Subject: [PATCH 10/58] improve tests --- src/config/settings.py | 23 +++ tests/integration/test_api_endpoints.py | 198 +++++++++++++---------- tests/integration/test_startup_ingest.py | 81 +++++----- 3 files changed, 172 insertions(+), 130 deletions(-) diff --git a/src/config/settings.py b/src/config/settings.py index ace9d5cb..dc9a6e23 100644 --- a/src/config/settings.py +++ b/src/config/settings.py @@ -297,6 +297,29 @@ class AppClients: return self + async def close(self): + """Close all client connections""" + try: + if hasattr(self, 'opensearch') and self.opensearch: + await self.opensearch.close() + self.opensearch = None + except Exception as e: + logger.warning("Error closing OpenSearch client", error=str(e)) + + try: + if hasattr(self, 'langflow_http_client') and self.langflow_http_client: + await self.langflow_http_client.aclose() + self.langflow_http_client = None + except Exception as e: + logger.warning("Error closing Langflow HTTP client", error=str(e)) + + try: + if hasattr(self, 'patched_async_client') and self.patched_async_client: + await self.patched_async_client.close() + self.patched_async_client = None + except Exception as e: + logger.warning("Error closing OpenAI client", error=str(e)) + async def ensure_langflow_client(self): """Ensure Langflow client exists; try to generate key and create client lazily.""" if self.langflow_client is not None: diff --git a/tests/integration/test_api_endpoints.py b/tests/integration/test_api_endpoints.py index e2ae3c18..60810563 100644 --- a/tests/integration/test_api_endpoints.py +++ b/tests/integration/test_api_endpoints.py @@ -60,79 +60,89 @@ async def test_upload_and_search_endpoint(tmp_path: Path, disable_langflow_inges await clients.initialize() try: await clients.opensearch.indices.delete(index=INDEX_NAME) + # Wait for deletion to complete + await asyncio.sleep(1) except Exception: pass app = await create_app() # Manually run startup tasks since httpx ASGI transport here doesn't manage lifespan await startup_tasks(app.state.services) + + # Verify index is truly empty after startup + try: + count_response = await clients.opensearch.count(index=INDEX_NAME) + doc_count = count_response.get('count', 0) + assert doc_count == 0, f"Index should be empty after startup but contains {doc_count} documents" + except Exception as e: + # If count fails, the index might not exist yet, which is fine + pass transport = httpx.ASGITransport(app=app) - async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client: - # Wait for app + OpenSearch readiness using existing endpoints - await wait_for_service_ready(client) - - # Create a temporary markdown file to upload - file_path = tmp_path / "endpoint_test_doc.md" - file_text = ( - "# Single Test Document\n\n" - "This is a test document about OpenRAG testing framework. " - "The content should be indexed and searchable in OpenSearch after processing." - ) - file_path.write_text(file_text) - - # POST via router (multipart) - files = { - "file": ( - file_path.name, - file_path.read_bytes(), - "text/markdown", - ) - } - upload_resp = await client.post("/upload", files=files) - body = upload_resp.json() - # Router now returns 201 + task_id (async) regardless of mode - assert upload_resp.status_code == 201, upload_resp.text - assert isinstance(body.get("task_id"), str) - - # Poll search for the specific content until it's indexed - async def _wait_for_indexed(timeout_s: float = 30.0): - deadline = asyncio.get_event_loop().time() + timeout_s - while asyncio.get_event_loop().time() < deadline: - resp = await client.post( - "/search", - json={"query": "OpenRAG testing framework", "limit": 5}, - ) - if resp.status_code == 200 and resp.json().get("results"): - return resp - await asyncio.sleep(0.5) - return resp - - search_resp = await _wait_for_indexed() - - # POST /search - assert search_resp.status_code == 200, search_resp.text - search_body = search_resp.json() - - # Basic shape and at least one hit - assert isinstance(search_body.get("results"), list) - assert len(search_body["results"]) >= 0 - # When hits exist, confirm our phrase is present in top result content - if search_body["results"]: - top = search_body["results"][0] - assert "text" in top or "content" in top - text = top.get("text") or top.get("content") - assert isinstance(text, str) - assert "testing" in text.lower() - # Explicitly close global clients to avoid aiohttp warnings - from src.config.settings import clients try: - if getattr(clients, "opensearch", None): - await clients.opensearch.close() - if getattr(clients, "langflow_http_client", None): - await clients.langflow_http_client.aclose() - except Exception: - pass + async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client: + # Wait for app + OpenSearch readiness using existing endpoints + await wait_for_service_ready(client) + + # Create a temporary markdown file to upload + file_path = tmp_path / "endpoint_test_doc.md" + file_text = ( + "# Single Test Document\n\n" + "This is a test document about OpenRAG testing framework. " + "The content should be indexed and searchable in OpenSearch after processing." + ) + file_path.write_text(file_text) + + # POST via router (multipart) + files = { + "file": ( + file_path.name, + file_path.read_bytes(), + "text/markdown", + ) + } + upload_resp = await client.post("/upload", files=files) + body = upload_resp.json() + # Router now returns 201 + task_id (async) regardless of mode + assert upload_resp.status_code == 201, upload_resp.text + assert isinstance(body.get("task_id"), str) + + # Poll search for the specific content until it's indexed + async def _wait_for_indexed(timeout_s: float = 30.0): + deadline = asyncio.get_event_loop().time() + timeout_s + while asyncio.get_event_loop().time() < deadline: + resp = await client.post( + "/search", + json={"query": "OpenRAG testing framework", "limit": 5}, + ) + if resp.status_code == 200 and resp.json().get("results"): + return resp + await asyncio.sleep(0.5) + return resp + + search_resp = await _wait_for_indexed() + + # POST /search + assert search_resp.status_code == 200, search_resp.text + search_body = search_resp.json() + + # Basic shape and at least one hit + assert isinstance(search_body.get("results"), list) + assert len(search_body["results"]) >= 0 + # When hits exist, confirm our phrase is present in top result content + if search_body["results"]: + top = search_body["results"][0] + assert "text" in top or "content" in top + text = top.get("text") or top.get("content") + assert isinstance(text, str) + assert "testing" in text.lower() + finally: + # Explicitly close global clients to avoid aiohttp warnings + from src.config.settings import clients + try: + await clients.close() + except Exception: + pass @pytest.mark.parametrize("disable_langflow_ingest", [True, False]) @@ -159,35 +169,45 @@ async def test_router_upload_ingest_traditional(tmp_path: Path, disable_langflow await clients.initialize() try: await clients.opensearch.indices.delete(index=INDEX_NAME) + # Wait for deletion to complete + await asyncio.sleep(1) except Exception: pass app = await create_app() await startup_tasks(app.state.services) - transport = httpx.ASGITransport(app=app) - async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client: - await wait_for_service_ready(client) - - file_path = tmp_path / "router_test_doc.md" - file_path.write_text("# Router Test\n\nThis file validates the upload router.") - - files = { - "file": ( - file_path.name, - file_path.read_bytes(), - "text/markdown", - ) - } - - resp = await client.post("/upload", files=files) - data = resp.json() - assert resp.status_code == 201, resp.text - assert isinstance(data.get("task_id"), str) - from src.config.settings import clients + + # Verify index is truly empty after startup try: - if getattr(clients, "opensearch", None): - await clients.opensearch.close() - if getattr(clients, "langflow_http_client", None): - await clients.langflow_http_client.aclose() - except Exception: + count_response = await clients.opensearch.count(index=INDEX_NAME) + doc_count = count_response.get('count', 0) + assert doc_count == 0, f"Index should be empty after startup but contains {doc_count} documents" + except Exception as e: + # If count fails, the index might not exist yet, which is fine pass + transport = httpx.ASGITransport(app=app) + try: + async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client: + await wait_for_service_ready(client) + + file_path = tmp_path / "router_test_doc.md" + file_path.write_text("# Router Test\n\nThis file validates the upload router.") + + files = { + "file": ( + file_path.name, + file_path.read_bytes(), + "text/markdown", + ) + } + + resp = await client.post("/upload", files=files) + data = resp.json() + assert resp.status_code == 201, resp.text + assert isinstance(data.get("task_id"), str) + finally: + from src.config.settings import clients + try: + await clients.close() + except Exception: + pass diff --git a/tests/integration/test_startup_ingest.py b/tests/integration/test_startup_ingest.py index 5ce62a94..436c4d28 100644 --- a/tests/integration/test_startup_ingest.py +++ b/tests/integration/test_startup_ingest.py @@ -70,45 +70,44 @@ async def test_startup_ingest_creates_task(disable_langflow_ingest: bool): await startup_tasks(app.state.services) transport = httpx.ASGITransport(app=app) - async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client: - await wait_for_ready(client) - - expected_files = count_files_in_documents() - - # Poll /tasks until we see at least one startup ingest task - async def _wait_for_task(timeout_s: float = 60.0): - deadline = asyncio.get_event_loop().time() + timeout_s - last = None - while asyncio.get_event_loop().time() < deadline: - resp = await client.get("/tasks") - if resp.status_code == 200: - data = resp.json() - last = data - tasks = data.get("tasks") if isinstance(data, dict) else None - if isinstance(tasks, list) and len(tasks) > 0: - return tasks - await asyncio.sleep(0.5) - return last.get("tasks") if isinstance(last, dict) else last - - tasks = await _wait_for_task() - if expected_files == 0: - return # Nothing to do - if not (isinstance(tasks, list) and len(tasks) > 0): - # Fallback: verify that documents were indexed as a sign of startup ingest - sr = await client.post("/search", json={"query": "*", "limit": 1}) - assert sr.status_code == 200, sr.text - total = sr.json().get("total") - assert isinstance(total, int) and total >= 0, "Startup ingest did not index documents" - return - newest = tasks[0] - assert "task_id" in newest - assert newest.get("total_files") == expected_files - # Explicitly close global clients to avoid aiohttp warnings - from src.config.settings import clients try: - if getattr(clients, "opensearch", None): - await clients.opensearch.close() - if getattr(clients, "langflow_http_client", None): - await clients.langflow_http_client.aclose() - except Exception: - pass + async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client: + await wait_for_ready(client) + + expected_files = count_files_in_documents() + + # Poll /tasks until we see at least one startup ingest task + async def _wait_for_task(timeout_s: float = 60.0): + deadline = asyncio.get_event_loop().time() + timeout_s + last = None + while asyncio.get_event_loop().time() < deadline: + resp = await client.get("/tasks") + if resp.status_code == 200: + data = resp.json() + last = data + tasks = data.get("tasks") if isinstance(data, dict) else None + if isinstance(tasks, list) and len(tasks) > 0: + return tasks + await asyncio.sleep(0.5) + return last.get("tasks") if isinstance(last, dict) else last + + tasks = await _wait_for_task() + if expected_files == 0: + return # Nothing to do + if not (isinstance(tasks, list) and len(tasks) > 0): + # Fallback: verify that documents were indexed as a sign of startup ingest + sr = await client.post("/search", json={"query": "*", "limit": 1}) + assert sr.status_code == 200, sr.text + total = sr.json().get("total") + assert isinstance(total, int) and total >= 0, "Startup ingest did not index documents" + return + newest = tasks[0] + assert "task_id" in newest + assert newest.get("total_files") == expected_files + finally: + # Explicitly close global clients to avoid aiohttp warnings + from src.config.settings import clients + try: + await clients.close() + except Exception: + pass From 33911052a6bf83ce43736894966578663c50c87a Mon Sep 17 00:00:00 2001 From: phact Date: Fri, 12 Sep 2025 11:55:15 -0400 Subject: [PATCH 11/58] add integration test action --- .github/workflows/test-integration.yml | 45 ++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 .github/workflows/test-integration.yml diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml new file mode 100644 index 00000000..8c4f971c --- /dev/null +++ b/.github/workflows/test-integration.yml @@ -0,0 +1,45 @@ +name: Integration Tests + +on: + pull_request: + push: + branches: + - main + - develop + +jobs: + tests: + runs-on: ubuntu-latest + env: + # Prefer repository/environment variable first, then secret, then a sane fallback + OPENSEARCH_PASSWORD: ${{ vars.OPENSEARCH_PASSWORD || secrets.OPENSEARCH_PASSWORD || 'OpenRag#2025!' }} + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up UV + uses: astral-sh/setup-uv@v3 + with: + version: latest + + - name: Python version + run: uv python install 3.13 + + - name: Install dependencies + run: uv sync + + - name: Run integration tests + env: + OPENSEARCH_HOST: localhost + OPENSEARCH_PORT: 9200 + OPENSEARCH_USERNAME: admin + OPENSEARCH_PASSWORD: ${{ env.OPENSEARCH_PASSWORD }} + LOG_LEVEL: DEBUG + # Force no-auth mode so tests bypass OAuth + GOOGLE_OAUTH_CLIENT_ID: "" + GOOGLE_OAUTH_CLIENT_SECRET: "" + # Disable startup ingest noise unless a test enables it + DISABLE_STARTUP_INGEST: "true" + run: | + make test-ci From 952dc6dc92c10329accc2be95340f00d16b72c85 Mon Sep 17 00:00:00 2001 From: phact Date: Fri, 12 Sep 2025 11:56:27 -0400 Subject: [PATCH 12/58] ci branches trigger --- .github/workflows/test-integration.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml index 8c4f971c..75b75ed3 100644 --- a/.github/workflows/test-integration.yml +++ b/.github/workflows/test-integration.yml @@ -5,7 +5,6 @@ on: push: branches: - main - - develop jobs: tests: From 57f893b622af55552a8b52af78ad8e223fabcd6a Mon Sep 17 00:00:00 2001 From: phact Date: Fri, 12 Sep 2025 12:33:02 -0400 Subject: [PATCH 13/58] ci node cleanup --- .github/workflows/test-integration.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml index 75b75ed3..19bacefd 100644 --- a/.github/workflows/test-integration.yml +++ b/.github/workflows/test-integration.yml @@ -14,6 +14,13 @@ jobs: OPENSEARCH_PASSWORD: ${{ vars.OPENSEARCH_PASSWORD || secrets.OPENSEARCH_PASSWORD || 'OpenRag#2025!' }} steps: + - run: df -h + - name: "node-cleanup" + run: | + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL + sudo docker image prune --all --force + sudo docker builder prune -a + - run: df -h - name: Checkout uses: actions/checkout@v4 From 463bb48222baab5be3ef79e4d3e5d3b5cb03fbfe Mon Sep 17 00:00:00 2001 From: phact Date: Fri, 12 Sep 2025 13:02:56 -0400 Subject: [PATCH 14/58] devel and torch dependencies optional --- .github/workflows/test-integration.yml | 2 +- Dockerfile.backend | 2 +- Makefile | 4 +++- pyproject.toml | 27 +++++++++++++++++--------- 4 files changed, 23 insertions(+), 12 deletions(-) diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml index 19bacefd..8b1a0b74 100644 --- a/.github/workflows/test-integration.yml +++ b/.github/workflows/test-integration.yml @@ -33,7 +33,7 @@ jobs: run: uv python install 3.13 - name: Install dependencies - run: uv sync + run: uv sync --group dev - name: Run integration tests env: diff --git a/Dockerfile.backend b/Dockerfile.backend index 5d9d84f4..d314eefe 100644 --- a/Dockerfile.backend +++ b/Dockerfile.backend @@ -18,7 +18,7 @@ WORKDIR /app # Copy Python dependencies COPY pyproject.toml uv.lock ./ -RUN uv sync +RUN uv sync --extra torch-cu128 # Copy sample document and warmup script for docling COPY documents/warmup_ocr.pdf ./ diff --git a/Makefile b/Makefile index e9c0367d..eeab5a12 100644 --- a/Makefile +++ b/Makefile @@ -118,7 +118,7 @@ install: install-be install-fe install-be: @echo "๐Ÿ“ฆ Installing backend dependencies..." - uv sync + uv sync --extra torch-cu128 install-fe: @echo "๐Ÿ“ฆ Installing frontend dependencies..." @@ -184,6 +184,8 @@ test-integration: # CI-friendly integration test target: brings up infra, waits, runs tests, tears down test-ci: @set -e; \ + echo "๐Ÿ“ฆ Installing test dependencies..."; \ + uv sync --group dev; \ echo "๐Ÿš€ Starting infra (OpenSearch + Dashboards + Langflow)"; \ make infra; \ echo "โณ Waiting for OpenSearch..."; \ diff --git a/pyproject.toml b/pyproject.toml index 04200e93..8e816391 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,6 @@ dependencies = [ "pyjwt>=2.8.0", "python-multipart>=0.0.20", "starlette>=0.47.1", - "torch>=2.7.1", "uvicorn>=0.35.0", "boto3>=1.35.0", "psutil>=7.0.0", @@ -27,12 +26,15 @@ dependencies = [ "python-dotenv>=1.0.0", "textual-fspicker>=0.6.0", "structlog>=25.4.0", - "pytest>=8.0.0", - "pytest-asyncio>=0.21.0", - "pytest-mock>=3.12.0", - "pytest-cov>=4.0.0", ] +[project.optional-dependencies] +torch = ["torch", "torchvision"] +torch-cu128 = ["torch", "torchvision"] + +[dependency-groups] +dev = ["pytest>=8", "pytest-asyncio>=0.21.0", "pytest-mock>=3.12.0", "pytest-cov>=4.0.0"] + [project.scripts] openrag = "tui.main:run_tui" @@ -41,13 +43,20 @@ package = true [tool.uv.sources] torch = [ - { index = "pytorch-cu128", marker = "sys_platform == 'linux' and platform_machine == 'x86_64'" }, + { index = "pytorch-cu128", extra = "torch-cu128" }, + { index = "pytorch-cpu", extra = "torch" } ] torchvision = [ - { index = "pytorch-cu128", marker = "sys_platform == 'linux' and platform_machine == 'x86_64'" }, + { index = "pytorch-cu128", extra = "torch-cu128" }, + { index = "pytorch-cpu", extra = "torch" } ] [[tool.uv.index]] -name = "pytorch-cu128" -url = "https://download.pytorch.org/whl/cu128" +name = "pytorch-cu128" +url = "https://download.pytorch.org/whl/cu128" +explicit = true + +[[tool.uv.index]] +name = "pytorch-cpu" +url = "https://download.pytorch.org/whl/cpu" explicit = true From 364f24a2ca1f690ca221f47b96353a95f964102f Mon Sep 17 00:00:00 2001 From: phact Date: Fri, 12 Sep 2025 13:11:32 -0400 Subject: [PATCH 15/58] torch dep fix --- pyproject.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8e816391..de2e562c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,12 +43,12 @@ package = true [tool.uv.sources] torch = [ - { index = "pytorch-cu128", extra = "torch-cu128" }, - { index = "pytorch-cpu", extra = "torch" } + { index = "pytorch-cu128", extra = "torch-cu128", marker = "sys_platform == 'linux'" }, + { index = "pytorch-cpu", extra = "torch", marker = "sys_platform != 'linux'" } ] torchvision = [ - { index = "pytorch-cu128", extra = "torch-cu128" }, - { index = "pytorch-cpu", extra = "torch" } + { index = "pytorch-cu128", extra = "torch-cu128", marker = "sys_platform == 'linux'" }, + { index = "pytorch-cpu", extra = "torch", marker = "sys_platform != 'linux'" } ] [[tool.uv.index]] From b5d0d23fbe5334fcacea5f931cc4d9a9308eca27 Mon Sep 17 00:00:00 2001 From: phact Date: Fri, 12 Sep 2025 13:40:14 -0400 Subject: [PATCH 16/58] ci cpu only --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index eeab5a12..2defe2bb 100644 --- a/Makefile +++ b/Makefile @@ -186,8 +186,8 @@ test-ci: @set -e; \ echo "๐Ÿ“ฆ Installing test dependencies..."; \ uv sync --group dev; \ - echo "๐Ÿš€ Starting infra (OpenSearch + Dashboards + Langflow)"; \ - make infra; \ + echo "๐Ÿš€ Starting infra (OpenSearch + Dashboards + Langflow) with CPU containers"; \ + docker compose -f docker-compose-cpu.yml up -d opensearch dashboards langflow; \ echo "โณ Waiting for OpenSearch..."; \ for i in $$(seq 1 60); do \ curl -k -s https://localhost:9200 -u admin:$${OPENSEARCH_PASSWORD} >/dev/null 2>&1 && break || sleep 2; \ From f0b608e776ef4c75b1d8980e8262f7b6c32bd7a6 Mon Sep 17 00:00:00 2001 From: phact Date: Fri, 12 Sep 2025 14:11:10 -0400 Subject: [PATCH 17/58] add openai key to workflow --- .github/workflows/test-integration.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml index 8b1a0b74..0ff6b8ff 100644 --- a/.github/workflows/test-integration.yml +++ b/.github/workflows/test-integration.yml @@ -12,6 +12,7 @@ jobs: env: # Prefer repository/environment variable first, then secret, then a sane fallback OPENSEARCH_PASSWORD: ${{ vars.OPENSEARCH_PASSWORD || secrets.OPENSEARCH_PASSWORD || 'OpenRag#2025!' }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} steps: - run: df -h From 1549161a336a1dd645eb926cd43410cc60066b37 Mon Sep 17 00:00:00 2001 From: phact Date: Fri, 12 Sep 2025 14:35:42 -0400 Subject: [PATCH 18/58] genrate keys --- tests/conftest.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 2edf3d65..87722481 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -17,6 +17,7 @@ os.environ.pop('GOOGLE_OAUTH_CLIENT_SECRET', None) from src.config.settings import clients from src.session_manager import SessionManager +from src.main import generate_jwt_keys @pytest.fixture(scope="session") @@ -42,6 +43,8 @@ async def opensearch_client(): @pytest.fixture def session_manager(): """Session manager for testing.""" + # Generate RSA keys before creating SessionManager + generate_jwt_keys() return SessionManager("test-secret-key") From 2210f6ac7365f97d2dc3a97d84311fb31d086c17 Mon Sep 17 00:00:00 2001 From: phact Date: Fri, 12 Sep 2025 15:32:09 -0400 Subject: [PATCH 19/58] debug keys dir --- .github/workflows/test-integration.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml index 0ff6b8ff..b883b747 100644 --- a/.github/workflows/test-integration.yml +++ b/.github/workflows/test-integration.yml @@ -36,6 +36,13 @@ jobs: - name: Install dependencies run: uv sync --group dev + - name: Debug keys directory + run: | + ls -la keys/ || echo "keys dir doesn't exist" + whoami + pwd + id + - name: Run integration tests env: OPENSEARCH_HOST: localhost From dd6886aec6bb30ad2c8b553d21873ea37abf2844 Mon Sep 17 00:00:00 2001 From: phact Date: Fri, 12 Sep 2025 15:40:31 -0400 Subject: [PATCH 20/58] debug keys --- .github/workflows/test-integration.yml | 7 ------- src/main.py | 20 +++++++++++++++++--- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml index b883b747..0ff6b8ff 100644 --- a/.github/workflows/test-integration.yml +++ b/.github/workflows/test-integration.yml @@ -36,13 +36,6 @@ jobs: - name: Install dependencies run: uv sync --group dev - - name: Debug keys directory - run: | - ls -la keys/ || echo "keys dir doesn't exist" - whoami - pwd - id - - name: Run integration tests env: OPENSEARCH_HOST: localhost diff --git a/src/main.py b/src/main.py index 873dd458..46b5fa7e 100644 --- a/src/main.py +++ b/src/main.py @@ -183,15 +183,19 @@ def generate_jwt_keys(): # Generate keys if they don't exist if not os.path.exists(private_key_path): try: + logger.info("Generating RSA keys", private_key_path=private_key_path, public_key_path=public_key_path) + # Generate private key - subprocess.run( + result = subprocess.run( ["openssl", "genrsa", "-out", private_key_path, "2048"], check=True, capture_output=True, + text=True, ) + logger.info("Private key generation completed", stdout=result.stdout, stderr=result.stderr) # Generate public key - subprocess.run( + result = subprocess.run( [ "openssl", "rsa", @@ -203,11 +207,21 @@ def generate_jwt_keys(): ], check=True, capture_output=True, + text=True, ) + logger.info("Public key generation completed", stdout=result.stdout, stderr=result.stderr) + + # Verify files were created and are readable + logger.info("Verifying generated keys") + logger.info("Private key exists", exists=os.path.exists(private_key_path)) + logger.info("Public key exists", exists=os.path.exists(public_key_path)) + if os.path.exists(private_key_path): + stat_info = os.stat(private_key_path) + logger.info("Private key permissions", mode=oct(stat_info.st_mode), uid=stat_info.st_uid, gid=stat_info.st_gid) logger.info("Generated RSA keys for JWT signing") except subprocess.CalledProcessError as e: - logger.error("Failed to generate RSA keys", error=str(e)) + logger.error("Failed to generate RSA keys", error=str(e), stdout=e.stdout, stderr=e.stderr) raise else: logger.info("RSA keys already exist, skipping generation") From ccd5be6bdca4066152fe3d66ccd71576455db0a0 Mon Sep 17 00:00:00 2001 From: phact Date: Mon, 15 Sep 2025 15:49:28 -0400 Subject: [PATCH 21/58] ls keys --- .github/workflows/test-integration.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml index 0ff6b8ff..46bbe977 100644 --- a/.github/workflows/test-integration.yml +++ b/.github/workflows/test-integration.yml @@ -50,3 +50,5 @@ jobs: DISABLE_STARTUP_INGEST: "true" run: | make test-ci + echo "Keys directory after tests:" + ls -la keys/ || echo "No keys directory" From 8ee1011562721c4d5e6269174515285df6dc0799 Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 01:32:56 -0400 Subject: [PATCH 22/58] unnecessary arg --- src/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.py b/src/main.py index a09d2488..732eee1f 100644 --- a/src/main.py +++ b/src/main.py @@ -131,7 +131,7 @@ async def configure_alerting_security(): # Don't fail startup if alerting config fails -async def _ensure_opensearch_index(self): +async def _ensure_opensearch_index(): """Ensure OpenSearch index exists when using traditional connector service.""" try: # Check if index already exists From 31e49106fa9aeaa53c63134fa3879739c7bf5151 Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 01:34:01 -0400 Subject: [PATCH 23/58] dotenv override=False --- src/config/settings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/config/settings.py b/src/config/settings.py index 6f55520d..d5a0bcac 100644 --- a/src/config/settings.py +++ b/src/config/settings.py @@ -13,8 +13,8 @@ from utils.container_utils import get_container_host from utils.document_processing import create_document_converter from utils.logging_config import get_logger -load_dotenv() -load_dotenv("../") +load_dotenv(override=False) +load_dotenv("../", override=False) logger = get_logger(__name__) From 65590f2a60a432878f5222fbb9b6bc7aaac01d50 Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 01:34:17 -0400 Subject: [PATCH 24/58] test-ci makefile with docling-serve --- Makefile | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index c24cce0b..47d61705 100644 --- a/Makefile +++ b/Makefile @@ -192,19 +192,26 @@ test-integration: # CI-friendly integration test target: brings up infra, waits, runs tests, tears down test-ci: @set -e; \ - echo "๐Ÿ“ฆ Installing test dependencies..."; \ + echo "Installing test dependencies..."; \ uv sync --group dev; \ - echo "๐Ÿš€ Starting infra (OpenSearch + Dashboards + Langflow) with CPU containers"; \ + echo "Starting infra (OpenSearch + Dashboards + Langflow) with CPU containers"; \ docker compose -f docker-compose-cpu.yml up -d opensearch dashboards langflow; \ - echo "โณ Waiting for OpenSearch..."; \ + echo "Starting docling-serve..."; \ + DOCLING_ENDPOINT=$$(uv run python scripts/docling_ctl.py start --port 5001 | grep "Endpoint:" | awk '{print $$2}'); \ + echo "Docling-serve started at $$DOCLING_ENDPOINT"; \ + echo "Waiting for OpenSearch..."; \ for i in $$(seq 1 60); do \ curl -k -s https://localhost:9200 -u admin:$${OPENSEARCH_PASSWORD} >/dev/null 2>&1 && break || sleep 2; \ done; \ - echo "โณ Waiting for Langflow..."; \ + echo "Waiting for Langflow..."; \ for i in $$(seq 1 60); do \ curl -s http://localhost:7860/ >/dev/null 2>&1 && break || sleep 2; \ done; \ - echo "๐Ÿงช Running integration tests"; \ + echo "Waiting for docling-serve at $$DOCLING_ENDPOINT..."; \ + for i in $$(seq 1 60); do \ + curl -s $${DOCLING_ENDPOINT}/health >/dev/null 2>&1 && break || sleep 2; \ + done; \ + echo "Running integration tests"; \ LOG_LEVEL=$${LOG_LEVEL:-DEBUG} \ GOOGLE_OAUTH_CLIENT_ID="" \ GOOGLE_OAUTH_CLIENT_SECRET="" \ @@ -212,7 +219,8 @@ test-ci: OPENSEARCH_USERNAME=admin OPENSEARCH_PASSWORD=$${OPENSEARCH_PASSWORD} \ DISABLE_STARTUP_INGEST=$${DISABLE_STARTUP_INGEST:-true} \ uv run pytest tests/integration -vv -s -o log_cli=true --log-cli-level=DEBUG; \ - echo "๐Ÿงน Tearing down infra"; \ + echo "Tearing down infra"; \ + uv run python scripts/docling_ctl.py stop || true; \ docker compose down -v || true lint: From adadb6ef0a7f99330e19c792f124b2f312f9de50 Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 01:34:38 -0400 Subject: [PATCH 25/58] docling-ctl for test-ci makefile --- scripts/docling_ctl.py | 91 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 scripts/docling_ctl.py diff --git a/scripts/docling_ctl.py b/scripts/docling_ctl.py new file mode 100644 index 00000000..8dc5c879 --- /dev/null +++ b/scripts/docling_ctl.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +"""Helper script to control docling-serve using DoclingManager for CI/testing.""" + +import sys +import asyncio +import argparse +from pathlib import Path + +# Add src to path so we can import DoclingManager +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from tui.managers.docling_manager import DoclingManager + + +async def start_docling(port: int = 5001, host: str = None, enable_ui: bool = False): + """Start docling-serve.""" + manager = DoclingManager() + + if manager.is_running(): + print(f"Docling-serve is already running") + status = manager.get_status() + print(f"Endpoint: {status['endpoint']}") + return 0 + + host_msg = f"{host}:{port}" if host else f"auto-detected host:{port}" + print(f"Starting docling-serve on {host_msg}...") + success, message = await manager.start(port=port, host=host, enable_ui=enable_ui) + + if success: + print(f"{message}") + status = manager.get_status() + print(f"Endpoint: {status['endpoint']}") + print(f"PID: {status['pid']}") + return 0 + else: + print(f"{message}", file=sys.stderr) + return 1 + + +async def stop_docling(): + """Stop docling-serve.""" + manager = DoclingManager() + + if not manager.is_running(): + print("Docling-serve is not running") + return 0 + + print("Stopping docling-serve...") + success, message = await manager.stop() + + if success: + print(f"{message}") + return 0 + else: + print(f"{message}", file=sys.stderr) + return 1 + + +async def status_docling(): + """Get docling-serve status.""" + manager = DoclingManager() + status = manager.get_status() + + print(f"Status: {status['status']}") + if status['status'] == 'running': + print(f"Endpoint: {status['endpoint']}") + print(f"Docs: {status['docs_url']}") + print(f"PID: {status['pid']}") + + return 0 if status['status'] == 'running' else 1 + + +async def main(): + parser = argparse.ArgumentParser(description="Control docling-serve for CI/testing") + parser.add_argument("command", choices=["start", "stop", "status"], help="Command to run") + parser.add_argument("--port", type=int, default=5001, help="Port to run on (default: 5001)") + parser.add_argument("--host", default=None, help="Host to bind to (default: auto-detect for containers)") + parser.add_argument("--enable-ui", action="store_true", help="Enable UI") + + args = parser.parse_args() + + if args.command == "start": + return await start_docling(port=args.port, host=args.host if args.host else None, enable_ui=args.enable_ui) + elif args.command == "stop": + return await stop_docling() + elif args.command == "status": + return await status_docling() + + +if __name__ == "__main__": + sys.exit(asyncio.run(main())) From ad890ef2bcfd3bbec4e34fd655e72bf5f82993db Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 01:35:20 -0400 Subject: [PATCH 26/58] index creation text fix --- tests/conftest.py | 6 +++--- tests/integration/test_api_endpoints.py | 14 ++++++++++++-- tests/integration/test_startup_ingest.py | 5 +++++ 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 87722481..27a6f750 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,10 +10,10 @@ from dotenv import load_dotenv # Load environment variables load_dotenv() -# Force no-auth mode for testing by removing OAuth credentials +# Force no-auth mode for testing by setting OAuth credentials to empty strings # This ensures anonymous JWT tokens are created automatically -os.environ.pop('GOOGLE_OAUTH_CLIENT_ID', None) -os.environ.pop('GOOGLE_OAUTH_CLIENT_SECRET', None) +os.environ['GOOGLE_OAUTH_CLIENT_ID'] = '' +os.environ['GOOGLE_OAUTH_CLIENT_SECRET'] = '' from src.config.settings import clients from src.session_manager import SessionManager diff --git a/tests/integration/test_api_endpoints.py b/tests/integration/test_api_endpoints.py index 60810563..20f57d55 100644 --- a/tests/integration/test_api_endpoints.py +++ b/tests/integration/test_api_endpoints.py @@ -50,6 +50,7 @@ async def test_upload_and_search_endpoint(tmp_path: Path, disable_langflow_inges "src.api.router", "src.api.connector_router", "src.config.settings", + "src.auth_middleware", "src.main", ]: sys.modules.pop(mod, None) @@ -68,7 +69,11 @@ async def test_upload_and_search_endpoint(tmp_path: Path, disable_langflow_inges app = await create_app() # Manually run startup tasks since httpx ASGI transport here doesn't manage lifespan await startup_tasks(app.state.services) - + + # Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True) + from src.main import _ensure_opensearch_index + await _ensure_opensearch_index() + # Verify index is truly empty after startup try: count_response = await clients.opensearch.count(index=INDEX_NAME) @@ -159,6 +164,7 @@ async def test_router_upload_ingest_traditional(tmp_path: Path, disable_langflow "src.api.router", "src.api.connector_router", "src.config.settings", + "src.auth_middleware", "src.main", ]: sys.modules.pop(mod, None) @@ -176,7 +182,11 @@ async def test_router_upload_ingest_traditional(tmp_path: Path, disable_langflow app = await create_app() await startup_tasks(app.state.services) - + + # Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True) + from src.main import _ensure_opensearch_index + await _ensure_opensearch_index() + # Verify index is truly empty after startup try: count_response = await clients.opensearch.count(index=INDEX_NAME) diff --git a/tests/integration/test_startup_ingest.py b/tests/integration/test_startup_ingest.py index 436c4d28..b2243b33 100644 --- a/tests/integration/test_startup_ingest.py +++ b/tests/integration/test_startup_ingest.py @@ -51,6 +51,7 @@ async def test_startup_ingest_creates_task(disable_langflow_ingest: bool): "src.api.router", "src.api.connector_router", "src.config.settings", + "src.auth_middleware", "src.main", ]: sys.modules.pop(mod, None) @@ -69,6 +70,10 @@ async def test_startup_ingest_creates_task(disable_langflow_ingest: bool): # Trigger startup tasks explicitly await startup_tasks(app.state.services) + # Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True) + from src.main import _ensure_opensearch_index + await _ensure_opensearch_index() + transport = httpx.ASGITransport(app=app) try: async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client: From 330b16ae06e9e6e9ed35f5c89c8980e9a9b0bd92 Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 02:00:57 -0400 Subject: [PATCH 27/58] preserve file name for upload --- src/services/document_service.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/services/document_service.py b/src/services/document_service.py index 5204ea0e..d596fb25 100644 --- a/src/services/document_service.py +++ b/src/services/document_service.py @@ -126,7 +126,11 @@ class DocumentService: from utils.file_utils import auto_cleanup_tempfile import os - with auto_cleanup_tempfile() as tmp_path: + # Preserve file extension for docling format detection + filename = upload_file.filename or "uploaded" + suffix = os.path.splitext(filename)[1] or "" + + with auto_cleanup_tempfile(suffix=suffix) as tmp_path: # Stream upload file to temporary file file_size = 0 with open(tmp_path, 'wb') as tmp_file: From 5e48d7b791b88dc5bec3312b16d1f1e598d0ccc4 Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 02:07:06 -0400 Subject: [PATCH 28/58] trace logging --- src/auth_middleware.py | 4 ++-- src/config/settings.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/auth_middleware.py b/src/auth_middleware.py index 44d1b2f0..45333c2f 100644 --- a/src/auth_middleware.py +++ b/src/auth_middleware.py @@ -28,7 +28,7 @@ def require_auth(session_manager): async def wrapper(request: Request): # In no-auth mode, bypass authentication entirely if is_no_auth_mode(): - logger.debug("No-auth mode: Creating anonymous user") + logger.trace("No-auth mode: Creating anonymous user") # Create an anonymous user object so endpoints don't break from session_manager import User from datetime import datetime @@ -36,7 +36,7 @@ def require_auth(session_manager): from session_manager import AnonymousUser request.state.user = AnonymousUser() request.state.jwt_token = None # No JWT in no-auth mode - logger.debug("Set user_id=anonymous, jwt_token=None") + logger.trace("Set user_id=anonymous, jwt_token=None") return await handler(request) user = get_current_user(request, session_manager) diff --git a/src/config/settings.py b/src/config/settings.py index d5a0bcac..6e4581dd 100644 --- a/src/config/settings.py +++ b/src/config/settings.py @@ -61,7 +61,7 @@ DISABLE_INGEST_WITH_LANGFLOW = os.getenv( def is_no_auth_mode(): """Check if we're running in no-auth mode (OAuth credentials missing)""" result = not (GOOGLE_OAUTH_CLIENT_ID and GOOGLE_OAUTH_CLIENT_SECRET) - logger.debug( + logger.trace( "Checking auth mode", no_auth_mode=result, has_client_id=GOOGLE_OAUTH_CLIENT_ID is not None, From 13c33fca8f72710fad1af53b835a156a746bba29 Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 02:11:51 -0400 Subject: [PATCH 29/58] remove logging --- src/auth_middleware.py | 2 -- src/config/settings.py | 6 ------ 2 files changed, 8 deletions(-) diff --git a/src/auth_middleware.py b/src/auth_middleware.py index 45333c2f..1bc6cf04 100644 --- a/src/auth_middleware.py +++ b/src/auth_middleware.py @@ -28,7 +28,6 @@ def require_auth(session_manager): async def wrapper(request: Request): # In no-auth mode, bypass authentication entirely if is_no_auth_mode(): - logger.trace("No-auth mode: Creating anonymous user") # Create an anonymous user object so endpoints don't break from session_manager import User from datetime import datetime @@ -36,7 +35,6 @@ def require_auth(session_manager): from session_manager import AnonymousUser request.state.user = AnonymousUser() request.state.jwt_token = None # No JWT in no-auth mode - logger.trace("Set user_id=anonymous, jwt_token=None") return await handler(request) user = get_current_user(request, session_manager) diff --git a/src/config/settings.py b/src/config/settings.py index 6e4581dd..598ccfb2 100644 --- a/src/config/settings.py +++ b/src/config/settings.py @@ -61,12 +61,6 @@ DISABLE_INGEST_WITH_LANGFLOW = os.getenv( def is_no_auth_mode(): """Check if we're running in no-auth mode (OAuth credentials missing)""" result = not (GOOGLE_OAUTH_CLIENT_ID and GOOGLE_OAUTH_CLIENT_SECRET) - logger.trace( - "Checking auth mode", - no_auth_mode=result, - has_client_id=GOOGLE_OAUTH_CLIENT_ID is not None, - has_client_secret=GOOGLE_OAUTH_CLIENT_SECRET is not None, - ) return result From 3efcbfd36476094400fadf8fcf2f12d901ed8418 Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 03:56:02 -0400 Subject: [PATCH 30/58] fix tests --- tests/integration/test_api_endpoints.py | 48 +++- uv.lock | 368 +++++++++++++++++++----- 2 files changed, 337 insertions(+), 79 deletions(-) diff --git a/tests/integration/test_api_endpoints.py b/tests/integration/test_api_endpoints.py index 20f57d55..fa36dc8b 100644 --- a/tests/integration/test_api_endpoints.py +++ b/tests/integration/test_api_endpoints.py @@ -18,14 +18,20 @@ async def wait_for_service_ready(client: httpx.AsyncClient, timeout_s: float = 3 while asyncio.get_event_loop().time() < deadline: try: r1 = await client.get("/auth/me") + if r1.status_code in (401, 403): + raise AssertionError(f"/auth/me returned {r1.status_code}: {r1.text}") if r1.status_code != 200: await asyncio.sleep(0.5) continue # match_all readiness probe; no embeddings r2 = await client.post("/search", json={"query": "*", "limit": 0}) + if r2.status_code in (401, 403): + raise AssertionError(f"/search returned {r2.status_code}: {r2.text}") if r2.status_code == 200: return last_err = r2.text + except AssertionError: + raise except Exception as e: last_err = str(e) await asyncio.sleep(0.5) @@ -48,14 +54,24 @@ async def test_upload_and_search_endpoint(tmp_path: Path, disable_langflow_inges # Clear cached modules so settings pick up env and router sees new flag for mod in [ "src.api.router", + "api.router", # Also clear the non-src path "src.api.connector_router", + "api.connector_router", "src.config.settings", + "config.settings", "src.auth_middleware", + "auth_middleware", "src.main", + "api", # Clear the api package itself + "src.api", ]: sys.modules.pop(mod, None) from src.main import create_app, startup_tasks - from src.config.settings import clients, INDEX_NAME + import src.api.router as upload_router + from src.config.settings import clients, INDEX_NAME, DISABLE_INGEST_WITH_LANGFLOW + + # Verify settings loaded correctly + print(f"Settings DISABLE_INGEST_WITH_LANGFLOW: {DISABLE_INGEST_WITH_LANGFLOW}") # Ensure a clean index before startup await clients.initialize() @@ -108,9 +124,9 @@ async def test_upload_and_search_endpoint(tmp_path: Path, disable_langflow_inges } upload_resp = await client.post("/upload", files=files) body = upload_resp.json() - # Router now returns 201 + task_id (async) regardless of mode assert upload_resp.status_code == 201, upload_resp.text - assert isinstance(body.get("task_id"), str) + assert body.get("status") in {"indexed", "unchanged"} + assert isinstance(body.get("id"), str) # Poll search for the specific content until it's indexed async def _wait_for_indexed(timeout_s: float = 30.0): @@ -162,14 +178,24 @@ async def test_router_upload_ingest_traditional(tmp_path: Path, disable_langflow import sys for mod in [ "src.api.router", + "api.router", # Also clear the non-src path "src.api.connector_router", + "api.connector_router", "src.config.settings", + "config.settings", "src.auth_middleware", + "auth_middleware", "src.main", + "api", # Clear the api package itself + "src.api", ]: sys.modules.pop(mod, None) from src.main import create_app, startup_tasks - from src.config.settings import clients, INDEX_NAME + import src.api.router as upload_router + from src.config.settings import clients, INDEX_NAME, DISABLE_INGEST_WITH_LANGFLOW + + # Verify settings loaded correctly + print(f"Settings DISABLE_INGEST_WITH_LANGFLOW: {DISABLE_INGEST_WITH_LANGFLOW}") # Ensure a clean index before startup await clients.initialize() @@ -211,10 +237,18 @@ async def test_router_upload_ingest_traditional(tmp_path: Path, disable_langflow ) } - resp = await client.post("/upload", files=files) + resp = await client.post("/router/upload_ingest", files=files) data = resp.json() - assert resp.status_code == 201, resp.text - assert isinstance(data.get("task_id"), str) + + print(f"data: {data}") + if disable_langflow_ingest: + assert resp.status_code == 201 or resp.status_code == 202, resp.text + assert data.get("status") in {"indexed", "unchanged"} + assert isinstance(data.get("id"), str) + else: + assert resp.status_code == 201 or resp.status_code == 202, resp.text + assert isinstance(data.get("task_id"), str) + assert data.get("file_count") == 1 finally: from src.config.settings import clients try: diff --git a/uv.lock b/uv.lock index c9bc6714..fd5164cb 100644 --- a/uv.lock +++ b/uv.lock @@ -5,7 +5,8 @@ resolution-markers = [ "sys_platform == 'darwin'", "platform_machine == 'aarch64' and sys_platform == 'linux'", "platform_machine == 'x86_64' and sys_platform == 'linux'", - "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')", + "sys_platform != 'darwin' and sys_platform != 'linux'", + "platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux'", ] [[package]] @@ -20,8 +21,9 @@ dependencies = [ { name = "psutil" }, { name = "pyyaml" }, { name = "safetensors" }, - { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, + { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" }, + { name = "torch", version = "2.8.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, + { name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/b1/72/ff3961c19ee395c3d30ac630ee77bfb0e1b46b87edc504d4f83bb4a89705/accelerate-1.10.1.tar.gz", hash = "sha256:3dea89e433420e4bfac0369cae7e36dcd6a56adfcfd38cdda145c6225eab5df8", size = 392446, upload-time = "2025-08-25T13:57:06.21Z" } wheels = [ @@ -293,7 +295,8 @@ source = { registry = "https://pypi.org/simple" } resolution-markers = [ "platform_machine == 'aarch64' and sys_platform == 'linux'", "platform_machine == 'x86_64' and sys_platform == 'linux'", - "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')", + "sys_platform != 'darwin' and sys_platform != 'linux'", + "platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux'", ] dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, @@ -312,6 +315,67 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, ] +[[package]] +name = "coverage" +version = "7.10.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/51/26/d22c300112504f5f9a9fd2297ce33c35f3d353e4aeb987c8419453b2a7c2/coverage-7.10.7.tar.gz", hash = "sha256:f4ab143ab113be368a3e9b795f9cd7906c5ef407d6173fe9675a902e1fffc239", size = 827704, upload-time = "2025-09-21T20:03:56.815Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/94/b765c1abcb613d103b64fcf10395f54d69b0ef8be6a0dd9c524384892cc7/coverage-7.10.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:981a651f543f2854abd3b5fcb3263aac581b18209be49863ba575de6edf4c14d", size = 218320, upload-time = "2025-09-21T20:01:56.629Z" }, + { url = "https://files.pythonhosted.org/packages/72/4f/732fff31c119bb73b35236dd333030f32c4bfe909f445b423e6c7594f9a2/coverage-7.10.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:73ab1601f84dc804f7812dc297e93cd99381162da39c47040a827d4e8dafe63b", size = 218575, upload-time = "2025-09-21T20:01:58.203Z" }, + { url = "https://files.pythonhosted.org/packages/87/02/ae7e0af4b674be47566707777db1aa375474f02a1d64b9323e5813a6cdd5/coverage-7.10.7-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a8b6f03672aa6734e700bbcd65ff050fd19cddfec4b031cc8cf1c6967de5a68e", size = 249568, upload-time = "2025-09-21T20:01:59.748Z" }, + { url = "https://files.pythonhosted.org/packages/a2/77/8c6d22bf61921a59bce5471c2f1f7ac30cd4ac50aadde72b8c48d5727902/coverage-7.10.7-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10b6ba00ab1132a0ce4428ff68cf50a25efd6840a42cdf4239c9b99aad83be8b", size = 252174, upload-time = "2025-09-21T20:02:01.192Z" }, + { url = "https://files.pythonhosted.org/packages/b1/20/b6ea4f69bbb52dac0aebd62157ba6a9dddbfe664f5af8122dac296c3ee15/coverage-7.10.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c79124f70465a150e89340de5963f936ee97097d2ef76c869708c4248c63ca49", size = 253447, upload-time = "2025-09-21T20:02:02.701Z" }, + { url = "https://files.pythonhosted.org/packages/f9/28/4831523ba483a7f90f7b259d2018fef02cb4d5b90bc7c1505d6e5a84883c/coverage-7.10.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:69212fbccdbd5b0e39eac4067e20a4a5256609e209547d86f740d68ad4f04911", size = 249779, upload-time = "2025-09-21T20:02:04.185Z" }, + { url = "https://files.pythonhosted.org/packages/a7/9f/4331142bc98c10ca6436d2d620c3e165f31e6c58d43479985afce6f3191c/coverage-7.10.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7ea7c6c9d0d286d04ed3541747e6597cbe4971f22648b68248f7ddcd329207f0", size = 251604, upload-time = "2025-09-21T20:02:06.034Z" }, + { url = "https://files.pythonhosted.org/packages/ce/60/bda83b96602036b77ecf34e6393a3836365481b69f7ed7079ab85048202b/coverage-7.10.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b9be91986841a75042b3e3243d0b3cb0b2434252b977baaf0cd56e960fe1e46f", size = 249497, upload-time = "2025-09-21T20:02:07.619Z" }, + { url = "https://files.pythonhosted.org/packages/5f/af/152633ff35b2af63977edd835d8e6430f0caef27d171edf2fc76c270ef31/coverage-7.10.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:b281d5eca50189325cfe1f365fafade89b14b4a78d9b40b05ddd1fc7d2a10a9c", size = 249350, upload-time = "2025-09-21T20:02:10.34Z" }, + { url = "https://files.pythonhosted.org/packages/9d/71/d92105d122bd21cebba877228990e1646d862e34a98bb3374d3fece5a794/coverage-7.10.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:99e4aa63097ab1118e75a848a28e40d68b08a5e19ce587891ab7fd04475e780f", size = 251111, upload-time = "2025-09-21T20:02:12.122Z" }, + { url = "https://files.pythonhosted.org/packages/a2/9e/9fdb08f4bf476c912f0c3ca292e019aab6712c93c9344a1653986c3fd305/coverage-7.10.7-cp313-cp313-win32.whl", hash = "sha256:dc7c389dce432500273eaf48f410b37886be9208b2dd5710aaf7c57fd442c698", size = 220746, upload-time = "2025-09-21T20:02:13.919Z" }, + { url = "https://files.pythonhosted.org/packages/b1/b1/a75fd25df44eab52d1931e89980d1ada46824c7a3210be0d3c88a44aaa99/coverage-7.10.7-cp313-cp313-win_amd64.whl", hash = "sha256:cac0fdca17b036af3881a9d2729a850b76553f3f716ccb0360ad4dbc06b3b843", size = 221541, upload-time = "2025-09-21T20:02:15.57Z" }, + { url = "https://files.pythonhosted.org/packages/14/3a/d720d7c989562a6e9a14b2c9f5f2876bdb38e9367126d118495b89c99c37/coverage-7.10.7-cp313-cp313-win_arm64.whl", hash = "sha256:4b6f236edf6e2f9ae8fcd1332da4e791c1b6ba0dc16a2dc94590ceccb482e546", size = 220170, upload-time = "2025-09-21T20:02:17.395Z" }, + { url = "https://files.pythonhosted.org/packages/bb/22/e04514bf2a735d8b0add31d2b4ab636fc02370730787c576bb995390d2d5/coverage-7.10.7-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a0ec07fd264d0745ee396b666d47cef20875f4ff2375d7c4f58235886cc1ef0c", size = 219029, upload-time = "2025-09-21T20:02:18.936Z" }, + { url = "https://files.pythonhosted.org/packages/11/0b/91128e099035ece15da3445d9015e4b4153a6059403452d324cbb0a575fa/coverage-7.10.7-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd5e856ebb7bfb7672b0086846db5afb4567a7b9714b8a0ebafd211ec7ce6a15", size = 219259, upload-time = "2025-09-21T20:02:20.44Z" }, + { url = "https://files.pythonhosted.org/packages/8b/51/66420081e72801536a091a0c8f8c1f88a5c4bf7b9b1bdc6222c7afe6dc9b/coverage-7.10.7-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f57b2a3c8353d3e04acf75b3fed57ba41f5c0646bbf1d10c7c282291c97936b4", size = 260592, upload-time = "2025-09-21T20:02:22.313Z" }, + { url = "https://files.pythonhosted.org/packages/5d/22/9b8d458c2881b22df3db5bb3e7369e63d527d986decb6c11a591ba2364f7/coverage-7.10.7-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1ef2319dd15a0b009667301a3f84452a4dc6fddfd06b0c5c53ea472d3989fbf0", size = 262768, upload-time = "2025-09-21T20:02:24.287Z" }, + { url = "https://files.pythonhosted.org/packages/f7/08/16bee2c433e60913c610ea200b276e8eeef084b0d200bdcff69920bd5828/coverage-7.10.7-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83082a57783239717ceb0ad584de3c69cf581b2a95ed6bf81ea66034f00401c0", size = 264995, upload-time = "2025-09-21T20:02:26.133Z" }, + { url = "https://files.pythonhosted.org/packages/20/9d/e53eb9771d154859b084b90201e5221bca7674ba449a17c101a5031d4054/coverage-7.10.7-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:50aa94fb1fb9a397eaa19c0d5ec15a5edd03a47bf1a3a6111a16b36e190cff65", size = 259546, upload-time = "2025-09-21T20:02:27.716Z" }, + { url = "https://files.pythonhosted.org/packages/ad/b0/69bc7050f8d4e56a89fb550a1577d5d0d1db2278106f6f626464067b3817/coverage-7.10.7-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2120043f147bebb41c85b97ac45dd173595ff14f2a584f2963891cbcc3091541", size = 262544, upload-time = "2025-09-21T20:02:29.216Z" }, + { url = "https://files.pythonhosted.org/packages/ef/4b/2514b060dbd1bc0aaf23b852c14bb5818f244c664cb16517feff6bb3a5ab/coverage-7.10.7-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2fafd773231dd0378fdba66d339f84904a8e57a262f583530f4f156ab83863e6", size = 260308, upload-time = "2025-09-21T20:02:31.226Z" }, + { url = "https://files.pythonhosted.org/packages/54/78/7ba2175007c246d75e496f64c06e94122bdb914790a1285d627a918bd271/coverage-7.10.7-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:0b944ee8459f515f28b851728ad224fa2d068f1513ef6b7ff1efafeb2185f999", size = 258920, upload-time = "2025-09-21T20:02:32.823Z" }, + { url = "https://files.pythonhosted.org/packages/c0/b3/fac9f7abbc841409b9a410309d73bfa6cfb2e51c3fada738cb607ce174f8/coverage-7.10.7-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4b583b97ab2e3efe1b3e75248a9b333bd3f8b0b1b8e5b45578e05e5850dfb2c2", size = 261434, upload-time = "2025-09-21T20:02:34.86Z" }, + { url = "https://files.pythonhosted.org/packages/ee/51/a03bec00d37faaa891b3ff7387192cef20f01604e5283a5fabc95346befa/coverage-7.10.7-cp313-cp313t-win32.whl", hash = "sha256:2a78cd46550081a7909b3329e2266204d584866e8d97b898cd7fb5ac8d888b1a", size = 221403, upload-time = "2025-09-21T20:02:37.034Z" }, + { url = "https://files.pythonhosted.org/packages/53/22/3cf25d614e64bf6d8e59c7c669b20d6d940bb337bdee5900b9ca41c820bb/coverage-7.10.7-cp313-cp313t-win_amd64.whl", hash = "sha256:33a5e6396ab684cb43dc7befa386258acb2d7fae7f67330ebb85ba4ea27938eb", size = 222469, upload-time = "2025-09-21T20:02:39.011Z" }, + { url = "https://files.pythonhosted.org/packages/49/a1/00164f6d30d8a01c3c9c48418a7a5be394de5349b421b9ee019f380df2a0/coverage-7.10.7-cp313-cp313t-win_arm64.whl", hash = "sha256:86b0e7308289ddde73d863b7683f596d8d21c7d8664ce1dee061d0bcf3fbb4bb", size = 220731, upload-time = "2025-09-21T20:02:40.939Z" }, + { url = "https://files.pythonhosted.org/packages/23/9c/5844ab4ca6a4dd97a1850e030a15ec7d292b5c5cb93082979225126e35dd/coverage-7.10.7-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b06f260b16ead11643a5a9f955bd4b5fd76c1a4c6796aeade8520095b75de520", size = 218302, upload-time = "2025-09-21T20:02:42.527Z" }, + { url = "https://files.pythonhosted.org/packages/f0/89/673f6514b0961d1f0e20ddc242e9342f6da21eaba3489901b565c0689f34/coverage-7.10.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:212f8f2e0612778f09c55dd4872cb1f64a1f2b074393d139278ce902064d5b32", size = 218578, upload-time = "2025-09-21T20:02:44.468Z" }, + { url = "https://files.pythonhosted.org/packages/05/e8/261cae479e85232828fb17ad536765c88dd818c8470aca690b0ac6feeaa3/coverage-7.10.7-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3445258bcded7d4aa630ab8296dea4d3f15a255588dd535f980c193ab6b95f3f", size = 249629, upload-time = "2025-09-21T20:02:46.503Z" }, + { url = "https://files.pythonhosted.org/packages/82/62/14ed6546d0207e6eda876434e3e8475a3e9adbe32110ce896c9e0c06bb9a/coverage-7.10.7-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bb45474711ba385c46a0bfe696c695a929ae69ac636cda8f532be9e8c93d720a", size = 252162, upload-time = "2025-09-21T20:02:48.689Z" }, + { url = "https://files.pythonhosted.org/packages/ff/49/07f00db9ac6478e4358165a08fb41b469a1b053212e8a00cb02f0d27a05f/coverage-7.10.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:813922f35bd800dca9994c5971883cbc0d291128a5de6b167c7aa697fcf59360", size = 253517, upload-time = "2025-09-21T20:02:50.31Z" }, + { url = "https://files.pythonhosted.org/packages/a2/59/c5201c62dbf165dfbc91460f6dbbaa85a8b82cfa6131ac45d6c1bfb52deb/coverage-7.10.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:93c1b03552081b2a4423091d6fb3787265b8f86af404cff98d1b5342713bdd69", size = 249632, upload-time = "2025-09-21T20:02:51.971Z" }, + { url = "https://files.pythonhosted.org/packages/07/ae/5920097195291a51fb00b3a70b9bbd2edbfe3c84876a1762bd1ef1565ebc/coverage-7.10.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cc87dd1b6eaf0b848eebb1c86469b9f72a1891cb42ac7adcfbce75eadb13dd14", size = 251520, upload-time = "2025-09-21T20:02:53.858Z" }, + { url = "https://files.pythonhosted.org/packages/b9/3c/a815dde77a2981f5743a60b63df31cb322c944843e57dbd579326625a413/coverage-7.10.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:39508ffda4f343c35f3236fe8d1a6634a51f4581226a1262769d7f970e73bffe", size = 249455, upload-time = "2025-09-21T20:02:55.807Z" }, + { url = "https://files.pythonhosted.org/packages/aa/99/f5cdd8421ea656abefb6c0ce92556709db2265c41e8f9fc6c8ae0f7824c9/coverage-7.10.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:925a1edf3d810537c5a3abe78ec5530160c5f9a26b1f4270b40e62cc79304a1e", size = 249287, upload-time = "2025-09-21T20:02:57.784Z" }, + { url = "https://files.pythonhosted.org/packages/c3/7a/e9a2da6a1fc5d007dd51fca083a663ab930a8c4d149c087732a5dbaa0029/coverage-7.10.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2c8b9a0636f94c43cd3576811e05b89aa9bc2d0a85137affc544ae5cb0e4bfbd", size = 250946, upload-time = "2025-09-21T20:02:59.431Z" }, + { url = "https://files.pythonhosted.org/packages/ef/5b/0b5799aa30380a949005a353715095d6d1da81927d6dbed5def2200a4e25/coverage-7.10.7-cp314-cp314-win32.whl", hash = "sha256:b7b8288eb7cdd268b0304632da8cb0bb93fadcfec2fe5712f7b9cc8f4d487be2", size = 221009, upload-time = "2025-09-21T20:03:01.324Z" }, + { url = "https://files.pythonhosted.org/packages/da/b0/e802fbb6eb746de006490abc9bb554b708918b6774b722bb3a0e6aa1b7de/coverage-7.10.7-cp314-cp314-win_amd64.whl", hash = "sha256:1ca6db7c8807fb9e755d0379ccc39017ce0a84dcd26d14b5a03b78563776f681", size = 221804, upload-time = "2025-09-21T20:03:03.4Z" }, + { url = "https://files.pythonhosted.org/packages/9e/e8/71d0c8e374e31f39e3389bb0bd19e527d46f00ea8571ec7ec8fd261d8b44/coverage-7.10.7-cp314-cp314-win_arm64.whl", hash = "sha256:097c1591f5af4496226d5783d036bf6fd6cd0cbc132e071b33861de756efb880", size = 220384, upload-time = "2025-09-21T20:03:05.111Z" }, + { url = "https://files.pythonhosted.org/packages/62/09/9a5608d319fa3eba7a2019addeacb8c746fb50872b57a724c9f79f146969/coverage-7.10.7-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:a62c6ef0d50e6de320c270ff91d9dd0a05e7250cac2a800b7784bae474506e63", size = 219047, upload-time = "2025-09-21T20:03:06.795Z" }, + { url = "https://files.pythonhosted.org/packages/f5/6f/f58d46f33db9f2e3647b2d0764704548c184e6f5e014bef528b7f979ef84/coverage-7.10.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9fa6e4dd51fe15d8738708a973470f67a855ca50002294852e9571cdbd9433f2", size = 219266, upload-time = "2025-09-21T20:03:08.495Z" }, + { url = "https://files.pythonhosted.org/packages/74/5c/183ffc817ba68e0b443b8c934c8795553eb0c14573813415bd59941ee165/coverage-7.10.7-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:8fb190658865565c549b6b4706856d6a7b09302c797eb2cf8e7fe9dabb043f0d", size = 260767, upload-time = "2025-09-21T20:03:10.172Z" }, + { url = "https://files.pythonhosted.org/packages/0f/48/71a8abe9c1ad7e97548835e3cc1adbf361e743e9d60310c5f75c9e7bf847/coverage-7.10.7-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:affef7c76a9ef259187ef31599a9260330e0335a3011732c4b9effa01e1cd6e0", size = 262931, upload-time = "2025-09-21T20:03:11.861Z" }, + { url = "https://files.pythonhosted.org/packages/84/fd/193a8fb132acfc0a901f72020e54be5e48021e1575bb327d8ee1097a28fd/coverage-7.10.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e16e07d85ca0cf8bafe5f5d23a0b850064e8e945d5677492b06bbe6f09cc699", size = 265186, upload-time = "2025-09-21T20:03:13.539Z" }, + { url = "https://files.pythonhosted.org/packages/b1/8f/74ecc30607dd95ad50e3034221113ccb1c6d4e8085cc761134782995daae/coverage-7.10.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:03ffc58aacdf65d2a82bbeb1ffe4d01ead4017a21bfd0454983b88ca73af94b9", size = 259470, upload-time = "2025-09-21T20:03:15.584Z" }, + { url = "https://files.pythonhosted.org/packages/0f/55/79ff53a769f20d71b07023ea115c9167c0bb56f281320520cf64c5298a96/coverage-7.10.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1b4fd784344d4e52647fd7857b2af5b3fbe6c239b0b5fa63e94eb67320770e0f", size = 262626, upload-time = "2025-09-21T20:03:17.673Z" }, + { url = "https://files.pythonhosted.org/packages/88/e2/dac66c140009b61ac3fc13af673a574b00c16efdf04f9b5c740703e953c0/coverage-7.10.7-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:0ebbaddb2c19b71912c6f2518e791aa8b9f054985a0769bdb3a53ebbc765c6a1", size = 260386, upload-time = "2025-09-21T20:03:19.36Z" }, + { url = "https://files.pythonhosted.org/packages/a2/f1/f48f645e3f33bb9ca8a496bc4a9671b52f2f353146233ebd7c1df6160440/coverage-7.10.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:a2d9a3b260cc1d1dbdb1c582e63ddcf5363426a1a68faa0f5da28d8ee3c722a0", size = 258852, upload-time = "2025-09-21T20:03:21.007Z" }, + { url = "https://files.pythonhosted.org/packages/bb/3b/8442618972c51a7affeead957995cfa8323c0c9bcf8fa5a027421f720ff4/coverage-7.10.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a3cc8638b2480865eaa3926d192e64ce6c51e3d29c849e09d5b4ad95efae5399", size = 261534, upload-time = "2025-09-21T20:03:23.12Z" }, + { url = "https://files.pythonhosted.org/packages/b2/dc/101f3fa3a45146db0cb03f5b4376e24c0aac818309da23e2de0c75295a91/coverage-7.10.7-cp314-cp314t-win32.whl", hash = "sha256:67f8c5cbcd3deb7a60b3345dffc89a961a484ed0af1f6f73de91705cc6e31235", size = 221784, upload-time = "2025-09-21T20:03:24.769Z" }, + { url = "https://files.pythonhosted.org/packages/4c/a1/74c51803fc70a8a40d7346660379e144be772bab4ac7bb6e6b905152345c/coverage-7.10.7-cp314-cp314t-win_amd64.whl", hash = "sha256:e1ed71194ef6dea7ed2d5cb5f7243d4bcd334bfb63e59878519be558078f848d", size = 222905, upload-time = "2025-09-21T20:03:26.93Z" }, + { url = "https://files.pythonhosted.org/packages/12/65/f116a6d2127df30bcafbceef0302d8a64ba87488bf6f73a6d8eebf060873/coverage-7.10.7-cp314-cp314t-win_arm64.whl", hash = "sha256:7fe650342addd8524ca63d77b2362b02345e5f1a093266787d210c70a50b471a", size = 220922, upload-time = "2025-09-21T20:03:28.672Z" }, + { url = "https://files.pythonhosted.org/packages/ec/16/114df1c291c22cac3b0c127a73e0af5c12ed7bbb6558d310429a0ae24023/coverage-7.10.7-py3-none-any.whl", hash = "sha256:f7941f6f2fe6dd6807a1208737b8a0cbcf1cc6d7b07d24998ad2d63590868260", size = 209952, upload-time = "2025-09-21T20:03:53.918Z" }, +] + [[package]] name = "cramjam" version = "2.11.0" @@ -456,7 +520,8 @@ source = { registry = "https://pypi.org/simple" } resolution-markers = [ "platform_machine == 'aarch64' and sys_platform == 'linux'", "platform_machine == 'x86_64' and sys_platform == 'linux'", - "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')", + "sys_platform != 'darwin' and sys_platform != 'linux'", + "platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux'", ] sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" } wheels = [ @@ -570,10 +635,13 @@ dependencies = [ { name = "pydantic" }, { name = "rtree" }, { name = "safetensors", extra = ["torch"] }, - { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, - { name = "torchvision", version = "0.22.1", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "torchvision", version = "0.23.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, + { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" }, + { name = "torch", version = "2.8.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, + { name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "torchvision", version = "0.22.1", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, + { name = "torchvision", version = "0.22.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" }, + { name = "torchvision", version = "0.23.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, + { name = "torchvision", version = "0.23.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, { name = "tqdm" }, { name = "transformers" }, ] @@ -621,7 +689,8 @@ source = { registry = "https://pypi.org/simple" } resolution-markers = [ "platform_machine == 'aarch64' and sys_platform == 'linux'", "platform_machine == 'x86_64' and sys_platform == 'linux'", - "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')", + "sys_platform != 'darwin' and sys_platform != 'linux'", + "platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux'", ] dependencies = [ { name = "docling", marker = "sys_platform != 'darwin'" }, @@ -726,10 +795,13 @@ dependencies = [ { name = "scikit-image" }, { name = "scipy" }, { name = "shapely" }, - { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, - { name = "torchvision", version = "0.22.1", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "torchvision", version = "0.23.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, + { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" }, + { name = "torch", version = "2.8.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, + { name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "torchvision", version = "0.22.1", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, + { name = "torchvision", version = "0.22.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" }, + { name = "torchvision", version = "0.23.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, + { name = "torchvision", version = "0.23.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/bb/84/4a2cab0e6adde6a85e7ba543862e5fc0250c51f3ac721a078a55cdcff250/easyocr-1.7.2-py3-none-any.whl", hash = "sha256:5be12f9b0e595d443c9c3d10b0542074b50f0ec2d98b141a109cd961fd1c177c", size = 2870178, upload-time = "2024-09-24T11:34:43.554Z" }, @@ -945,7 +1017,8 @@ source = { registry = "https://pypi.org/simple" } resolution-markers = [ "platform_machine == 'aarch64' and sys_platform == 'linux'", "platform_machine == 'x86_64' and sys_platform == 'linux'", - "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')", + "sys_platform != 'darwin' and sys_platform != 'linux'", + "platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux'", ] sdist = { url = "https://files.pythonhosted.org/packages/00/f7/27f15d41f0ed38e8fcc488584b57e902b331da7f7c6dcda53721b15838fc/fsspec-2025.5.1.tar.gz", hash = "sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475", size = 303033, upload-time = "2025-05-24T12:03:23.792Z" } wheels = [ @@ -1266,7 +1339,8 @@ source = { registry = "https://pypi.org/simple" } resolution-markers = [ "platform_machine == 'aarch64' and sys_platform == 'linux'", "platform_machine == 'x86_64' and sys_platform == 'linux'", - "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')", + "sys_platform != 'darwin' and sys_platform != 'linux'", + "platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux'", ] dependencies = [ { name = "filelock", marker = "sys_platform != 'darwin'" }, @@ -1339,6 +1413,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" }, ] +[[package]] +name = "iniconfig" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" }, +] + [[package]] name = "jinja2" version = "3.1.6" @@ -1962,7 +2045,8 @@ source = { registry = "https://pypi.org/simple" } resolution-markers = [ "platform_machine == 'aarch64' and sys_platform == 'linux'", "platform_machine == 'x86_64' and sys_platform == 'linux'", - "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')", + "sys_platform != 'darwin' and sys_platform != 'linux'", + "platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux'", ] dependencies = [ { name = "dill", version = "0.4.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'darwin'" }, @@ -2282,7 +2366,7 @@ wheels = [ [[package]] name = "openrag" -version = "0.1.14.dev3" +version = "0.1.15" source = { editable = "." } dependencies = [ { name = "agentd" }, @@ -2307,11 +2391,37 @@ dependencies = [ { name = "structlog" }, { name = "textual" }, { name = "textual-fspicker" }, - { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, { name = "uvicorn" }, ] +[package.optional-dependencies] +torch = [ + { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" }, + { name = "torch", version = "2.8.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, + { name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "torchvision", version = "0.22.1", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, + { name = "torchvision", version = "0.22.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" }, + { name = "torchvision", version = "0.23.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, + { name = "torchvision", version = "0.23.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, +] +torch-cu128 = [ + { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" }, + { name = "torch", version = "2.8.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, + { name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "torchvision", version = "0.22.1", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, + { name = "torchvision", version = "0.22.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" }, + { name = "torchvision", version = "0.23.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, + { name = "torchvision", version = "0.23.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, +] + +[package.dev-dependencies] +dev = [ + { name = "pytest" }, + { name = "pytest-asyncio" }, + { name = "pytest-cov" }, + { name = "pytest-mock" }, +] + [package.metadata] requires-dist = [ { name = "agentd", specifier = ">=0.2.2" }, @@ -2336,10 +2446,25 @@ requires-dist = [ { name = "structlog", specifier = ">=25.4.0" }, { name = "textual", specifier = ">=0.45.0" }, { name = "textual-fspicker", specifier = ">=0.6.0" }, - { name = "torch", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'", specifier = ">=2.7.1" }, - { name = "torch", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'", specifier = ">=2.7.1", index = "https://download.pytorch.org/whl/cu128" }, + { name = "torch", marker = "sys_platform == 'linux' and extra == 'torch'" }, + { name = "torch", marker = "sys_platform == 'linux' and extra == 'torch-cu128'", index = "https://download.pytorch.org/whl/cu128", conflict = { package = "openrag", extra = "torch-cu128" } }, + { name = "torch", marker = "sys_platform != 'linux' and extra == 'torch'", index = "https://download.pytorch.org/whl/cpu", conflict = { package = "openrag", extra = "torch" } }, + { name = "torch", marker = "sys_platform != 'linux' and extra == 'torch-cu128'" }, + { name = "torchvision", marker = "sys_platform == 'linux' and extra == 'torch'" }, + { name = "torchvision", marker = "sys_platform == 'linux' and extra == 'torch-cu128'", index = "https://download.pytorch.org/whl/cu128", conflict = { package = "openrag", extra = "torch-cu128" } }, + { name = "torchvision", marker = "sys_platform != 'linux' and extra == 'torch'", index = "https://download.pytorch.org/whl/cpu", conflict = { package = "openrag", extra = "torch" } }, + { name = "torchvision", marker = "sys_platform != 'linux' and extra == 'torch-cu128'" }, { name = "uvicorn", specifier = ">=0.35.0" }, ] +provides-extras = ["torch", "torch-cu128"] + +[package.metadata.requires-dev] +dev = [ + { name = "pytest", specifier = ">=8" }, + { name = "pytest-asyncio", specifier = ">=0.21.0" }, + { name = "pytest-cov", specifier = ">=4.0.0" }, + { name = "pytest-mock", specifier = ">=3.12.0" }, +] [[package]] name = "opensearch-py" @@ -2836,6 +2961,60 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e1/6b/2706497c86e8d69fb76afe5ea857fe1794621aa0f3b1d863feb953fe0f22/pypdfium2-4.30.1-py3-none-win_arm64.whl", hash = "sha256:c2b6d63f6d425d9416c08d2511822b54b8e3ac38e639fc41164b1d75584b3a8c", size = 2814810, upload-time = "2024-12-19T19:28:09.857Z" }, ] +[[package]] +name = "pytest" +version = "8.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" }, +] + +[[package]] +name = "pytest-asyncio" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/86/9e3c5f48f7b7b638b216e4b9e645f54d199d7abbbab7a64a13b4e12ba10f/pytest_asyncio-1.2.0.tar.gz", hash = "sha256:c609a64a2a8768462d0c99811ddb8bd2583c33fd33cf7f21af1c142e824ffb57", size = 50119, upload-time = "2025-09-12T07:33:53.816Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/93/2fa34714b7a4ae72f2f8dad66ba17dd9a2c793220719e736dda28b7aec27/pytest_asyncio-1.2.0-py3-none-any.whl", hash = "sha256:8e17ae5e46d8e7efe51ab6494dd2010f4ca8dae51652aa3c8d55acf50bfb2e99", size = 15095, upload-time = "2025-09-12T07:33:52.639Z" }, +] + +[[package]] +name = "pytest-cov" +version = "7.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "coverage" }, + { name = "pluggy" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" }, +] + +[[package]] +name = "pytest-mock" +version = "3.15.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/68/14/eb014d26be205d38ad5ad20d9a80f7d201472e08167f0bb4361e251084a9/pytest_mock-3.15.1.tar.gz", hash = "sha256:1849a238f6f396da19762269de72cb1814ab44416fa73a8686deac10b0d87a0f", size = 34036, upload-time = "2025-09-16T16:37:27.081Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/cc/06253936f4a7fa2e0f48dfe6d851d9c56df896a9ab09ac019d70b760619c/pytest_mock-3.15.1-py3-none-any.whl", hash = "sha256:0a25e2eb88fe5168d535041d09a4529a188176ae608a6d249ee65abc0949630d", size = 10095, upload-time = "2025-09-16T16:37:25.734Z" }, +] + [[package]] name = "python-bidi" version = "0.6.6" @@ -3261,8 +3440,9 @@ wheels = [ [package.optional-dependencies] torch = [ { name = "numpy" }, - { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, + { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" }, + { name = "torch", version = "2.8.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, + { name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, ] [[package]] @@ -3586,13 +3766,15 @@ name = "torch" version = "2.7.1+cu128" source = { registry = "https://download.pytorch.org/whl/cu128" } resolution-markers = [ + "platform_machine == 'aarch64' and sys_platform == 'linux'", "platform_machine == 'x86_64' and sys_platform == 'linux'", + "platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux'", ] dependencies = [ - { name = "filelock", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "fsspec", version = "2025.5.1", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "jinja2", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "networkx", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "filelock", marker = "sys_platform == 'linux'" }, + { name = "fsspec", version = "2025.5.1", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" }, + { name = "jinja2", marker = "sys_platform == 'linux'" }, + { name = "networkx", marker = "sys_platform == 'linux'" }, { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, @@ -3607,86 +3789,128 @@ dependencies = [ { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "setuptools", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "sympy", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "typing-extensions", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "setuptools", marker = "sys_platform == 'linux'" }, + { name = "sympy", marker = "sys_platform == 'linux'" }, + { name = "triton", marker = "sys_platform == 'linux'" }, + { name = "typing-extensions", marker = "sys_platform == 'linux'" }, ] wheels = [ + { url = "https://download.pytorch.org/whl/cu128/torch-2.7.1%2Bcu128-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:d56d29a6ad7758ba5173cc2b0c51c93e126e2b0a918e874101dc66545283967f" }, { url = "https://download.pytorch.org/whl/cu128/torch-2.7.1%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:9560425f9ea1af1791507e8ca70d5b9ecf62fed7ca226a95fcd58d0eb2cca78f" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.7.1%2Bcu128-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:f112465fdf42eb1297c6dddda1a8b7f411914428b704e1b8a47870c52e290909" }, { url = "https://download.pytorch.org/whl/cu128/torch-2.7.1%2Bcu128-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:c355db49c218ada70321d5c5c9bb3077312738b99113c8f3723ef596b554a7b9" }, ] [[package]] name = "torch" version = "2.8.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://download.pytorch.org/whl/cpu" } resolution-markers = [ "sys_platform == 'darwin'", - "platform_machine == 'aarch64' and sys_platform == 'linux'", - "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')", ] dependencies = [ - { name = "filelock", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, + { name = "filelock", marker = "sys_platform == 'darwin'" }, { name = "fsspec", version = "2025.3.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" }, - { name = "fsspec", version = "2025.5.1", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, - { name = "jinja2", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, - { name = "networkx", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, - { name = "setuptools", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, - { name = "sympy", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, - { name = "typing-extensions", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, + { name = "jinja2", marker = "sys_platform == 'darwin'" }, + { name = "networkx", marker = "sys_platform == 'darwin'" }, + { name = "setuptools", marker = "sys_platform == 'darwin'" }, + { name = "sympy", marker = "sys_platform == 'darwin'" }, + { name = "typing-extensions", marker = "sys_platform == 'darwin'" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/10/4e/469ced5a0603245d6a19a556e9053300033f9c5baccf43a3d25ba73e189e/torch-2.8.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2b2f96814e0345f5a5aed9bf9734efa913678ed19caf6dc2cddb7930672d6128", size = 101936856, upload-time = "2025-08-06T14:54:01.526Z" }, - { url = "https://files.pythonhosted.org/packages/16/82/3948e54c01b2109238357c6f86242e6ecbf0c63a1af46906772902f82057/torch-2.8.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:65616ca8ec6f43245e1f5f296603e33923f4c30f93d65e103d9e50c25b35150b", size = 887922844, upload-time = "2025-08-06T14:55:50.78Z" }, - { url = "https://files.pythonhosted.org/packages/e3/54/941ea0a860f2717d86a811adf0c2cd01b3983bdd460d0803053c4e0b8649/torch-2.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:659df54119ae03e83a800addc125856effda88b016dfc54d9f65215c3975be16", size = 241330968, upload-time = "2025-08-06T14:54:45.293Z" }, - { url = "https://files.pythonhosted.org/packages/de/69/8b7b13bba430f5e21d77708b616f767683629fc4f8037564a177d20f90ed/torch-2.8.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:1a62a1ec4b0498930e2543535cf70b1bef8c777713de7ceb84cd79115f553767", size = 73915128, upload-time = "2025-08-06T14:54:34.769Z" }, - { url = "https://files.pythonhosted.org/packages/15/0e/8a800e093b7f7430dbaefa80075aee9158ec22e4c4fc3c1a66e4fb96cb4f/torch-2.8.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:83c13411a26fac3d101fe8035a6b0476ae606deb8688e904e796a3534c197def", size = 102020139, upload-time = "2025-08-06T14:54:39.047Z" }, - { url = "https://files.pythonhosted.org/packages/4a/15/5e488ca0bc6162c86a33b58642bc577c84ded17c7b72d97e49b5833e2d73/torch-2.8.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:8f0a9d617a66509ded240add3754e462430a6c1fc5589f86c17b433dd808f97a", size = 887990692, upload-time = "2025-08-06T14:56:18.286Z" }, - { url = "https://files.pythonhosted.org/packages/b4/a8/6a04e4b54472fc5dba7ca2341ab219e529f3c07b6941059fbf18dccac31f/torch-2.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:a7242b86f42be98ac674b88a4988643b9bc6145437ec8f048fea23f72feb5eca", size = 241603453, upload-time = "2025-08-06T14:55:22.945Z" }, - { url = "https://files.pythonhosted.org/packages/04/6e/650bb7f28f771af0cb791b02348db8b7f5f64f40f6829ee82aa6ce99aabe/torch-2.8.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:7b677e17f5a3e69fdef7eb3b9da72622f8d322692930297e4ccb52fefc6c8211", size = 73632395, upload-time = "2025-08-06T14:55:28.645Z" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:fbe2e149c5174ef90d29a5f84a554dfaf28e003cb4f61fa2c8c024c17ec7ca58" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:057efd30a6778d2ee5e2374cd63a63f63311aa6f33321e627c655df60abdd390" }, +] + +[[package]] +name = "torch" +version = "2.8.0+cpu" +source = { registry = "https://download.pytorch.org/whl/cpu" } +resolution-markers = [ + "sys_platform != 'darwin' and sys_platform != 'linux'", +] +dependencies = [ + { name = "filelock", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "fsspec", version = "2025.5.1", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "jinja2", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "networkx", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "setuptools", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "sympy", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "typing-extensions", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, +] +wheels = [ + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:e1ee1b2346ade3ea90306dfbec7e8ff17bc220d344109d189ae09078333b0856" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-win_arm64.whl", hash = "sha256:64c187345509f2b1bb334feed4666e2c781ca381874bde589182f81247e61f88" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:6d93a7165419bc4b2b907e859ccab0dea5deeab261448ae9a5ec5431f14c0e64" }, ] [[package]] name = "torchvision" version = "0.22.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://download.pytorch.org/whl/cu128" } resolution-markers = [ - "platform_machine == 'x86_64' and sys_platform == 'linux'", + "platform_machine == 'aarch64' and sys_platform == 'linux'", ] dependencies = [ - { name = "numpy", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "pillow", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "numpy", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, + { name = "pillow", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, + { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/8d/b0/3cffd6a285b5ffee3fe4a31caff49e350c98c5963854474d1c4f7a51dea5/torchvision-0.22.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:7ee682be589bb1a002b7704f06b8ec0b89e4b9068f48e79307d2c6e937a9fdf4", size = 7485894, upload-time = "2025-06-04T17:43:01.371Z" }, - { url = "https://files.pythonhosted.org/packages/94/8b/04c6b15f8c29b39f0679589753091cec8b192ab296d4fdaf9055544c4ec9/torchvision-0.22.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ef46e065502f7300ad6abc98554131c35dc4c837b978d91306658f1a65c00baa", size = 7658543, upload-time = "2025-06-04T17:42:46.064Z" }, + { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:75f519ebe412ced95d727c71c30c68084cc6fd36347b88f338e88ff9d07a3ac8" }, + { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:f6565fd22e04e51f9600f34a3a20b120ee9f5a73161bfcb79c826225054aa44e" }, +] + +[[package]] +name = "torchvision" +version = "0.22.1+cu128" +source = { registry = "https://download.pytorch.org/whl/cu128" } +resolution-markers = [ + "platform_machine == 'x86_64' and sys_platform == 'linux'", + "platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux'", +] +dependencies = [ + { name = "numpy", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" }, + { name = "pillow", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" }, + { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.1%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:bc4fef193917b51db6b409acd3ffdec9286d877baac0aee5dcfbb72592d00bfc" }, + { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.1%2Bcu128-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:02faf51fbf5070592768fa935327d13a484b745faef38b0fee01d85cfb35f5bc" }, ] [[package]] name = "torchvision" version = "0.23.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://download.pytorch.org/whl/cpu" } resolution-markers = [ "sys_platform == 'darwin'", - "platform_machine == 'aarch64' and sys_platform == 'linux'", - "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')", ] dependencies = [ - { name = "numpy", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, - { name = "pillow", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, - { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, + { name = "numpy", marker = "sys_platform == 'darwin'" }, + { name = "pillow", marker = "sys_platform == 'darwin'" }, + { name = "torch", version = "2.8.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/91/37/45a5b9407a7900f71d61b2b2f62db4b7c632debca397f205fdcacb502780/torchvision-0.23.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1c37e325e09a184b730c3ef51424f383ec5745378dc0eca244520aca29722600", size = 1856886, upload-time = "2025-08-06T14:58:05.491Z" }, - { url = "https://files.pythonhosted.org/packages/ac/da/a06c60fc84fc849377cf035d3b3e9a1c896d52dbad493b963c0f1cdd74d0/torchvision-0.23.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2f7fd6c15f3697e80627b77934f77705f3bc0e98278b989b2655de01f6903e1d", size = 2353112, upload-time = "2025-08-06T14:58:26.265Z" }, - { url = "https://files.pythonhosted.org/packages/a0/27/5ce65ba5c9d3b7d2ccdd79892ab86a2f87ac2ca6638f04bb0280321f1a9c/torchvision-0.23.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:a76fafe113b2977be3a21bf78f115438c1f88631d7a87203acb3dd6ae55889e6", size = 8627658, upload-time = "2025-08-06T14:58:15.999Z" }, - { url = "https://files.pythonhosted.org/packages/1f/e4/028a27b60aa578a2fa99d9d7334ff1871bb17008693ea055a2fdee96da0d/torchvision-0.23.0-cp313-cp313-win_amd64.whl", hash = "sha256:07d069cb29691ff566e3b7f11f20d91044f079e1dbdc9d72e0655899a9b06938", size = 1600749, upload-time = "2025-08-06T14:58:10.719Z" }, - { url = "https://files.pythonhosted.org/packages/05/35/72f91ad9ac7c19a849dedf083d347dc1123f0adeb401f53974f84f1d04c8/torchvision-0.23.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:2df618e1143805a7673aaf82cb5720dd9112d4e771983156aaf2ffff692eebf9", size = 2047192, upload-time = "2025-08-06T14:58:11.813Z" }, - { url = "https://files.pythonhosted.org/packages/1d/9d/406cea60a9eb9882145bcd62a184ee61e823e8e1d550cdc3c3ea866a9445/torchvision-0.23.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:2a3299d2b1d5a7aed2d3b6ffb69c672ca8830671967eb1cee1497bacd82fe47b", size = 2359295, upload-time = "2025-08-06T14:58:17.469Z" }, - { url = "https://files.pythonhosted.org/packages/2b/f4/34662f71a70fa1e59de99772142f22257ca750de05ccb400b8d2e3809c1d/torchvision-0.23.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:76bc4c0b63d5114aa81281390f8472a12a6a35ce9906e67ea6044e5af4cab60c", size = 8800474, upload-time = "2025-08-06T14:58:22.53Z" }, - { url = "https://files.pythonhosted.org/packages/6e/f5/b5a2d841a8d228b5dbda6d524704408e19e7ca6b7bb0f24490e081da1fa1/torchvision-0.23.0-cp313-cp313t-win_amd64.whl", hash = "sha256:b9e2dabf0da9c8aa9ea241afb63a8f3e98489e706b22ac3f30416a1be377153b", size = 1527667, upload-time = "2025-08-06T14:58:14.446Z" }, + { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1c37e325e09a184b730c3ef51424f383ec5745378dc0eca244520aca29722600" }, + { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:2df618e1143805a7673aaf82cb5720dd9112d4e771983156aaf2ffff692eebf9" }, +] + +[[package]] +name = "torchvision" +version = "0.23.0+cpu" +source = { registry = "https://download.pytorch.org/whl/cpu" } +resolution-markers = [ + "sys_platform != 'darwin' and sys_platform != 'linux'", +] +dependencies = [ + { name = "numpy", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "pillow", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, +] +wheels = [ + { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:82928788025170c62e7df1120dcdc0cd175bfc31c08374613ce6d1a040bc0cda" }, + { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:8d6a47e23d7896f0ef9aa7ea7179eb6324e82438aa66d19884c2020d0646b104" }, ] [[package]] @@ -3728,7 +3952,7 @@ name = "triton" version = "3.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "setuptools", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "setuptools", marker = "sys_platform == 'linux'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/74/1f/dfb531f90a2d367d914adfee771babbd3f1a5b26c3f5fbc458dee21daa78/triton-3.3.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b89d846b5a4198317fec27a5d3a609ea96b6d557ff44b56c23176546023c4240", size = 155673035, upload-time = "2025-05-29T23:40:02.468Z" }, From bde95a58701456a8e913db791b721db02c54f9e9 Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 04:02:39 -0400 Subject: [PATCH 31/58] fix tests --- tests/integration/test_api_endpoints.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_api_endpoints.py b/tests/integration/test_api_endpoints.py index fa36dc8b..1d325a1b 100644 --- a/tests/integration/test_api_endpoints.py +++ b/tests/integration/test_api_endpoints.py @@ -64,15 +64,16 @@ async def test_upload_and_search_endpoint(tmp_path: Path, disable_langflow_inges "src.main", "api", # Clear the api package itself "src.api", + "services", # Clear services that import clients + "src.services", + "services.search_service", + "src.services.search_service", ]: sys.modules.pop(mod, None) from src.main import create_app, startup_tasks import src.api.router as upload_router from src.config.settings import clients, INDEX_NAME, DISABLE_INGEST_WITH_LANGFLOW - # Verify settings loaded correctly - print(f"Settings DISABLE_INGEST_WITH_LANGFLOW: {DISABLE_INGEST_WITH_LANGFLOW}") - # Ensure a clean index before startup await clients.initialize() try: @@ -188,15 +189,16 @@ async def test_router_upload_ingest_traditional(tmp_path: Path, disable_langflow "src.main", "api", # Clear the api package itself "src.api", + "services", # Clear services that import clients + "src.services", + "services.search_service", + "src.services.search_service", ]: sys.modules.pop(mod, None) from src.main import create_app, startup_tasks import src.api.router as upload_router from src.config.settings import clients, INDEX_NAME, DISABLE_INGEST_WITH_LANGFLOW - # Verify settings loaded correctly - print(f"Settings DISABLE_INGEST_WITH_LANGFLOW: {DISABLE_INGEST_WITH_LANGFLOW}") - # Ensure a clean index before startup await clients.initialize() try: From 5ace89ded5eb41617422547c34d67601730c2773 Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 10:18:05 -0400 Subject: [PATCH 32/58] big runners for integration-tests --- .github/workflows/test-integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml index 46bbe977..e20a5b70 100644 --- a/.github/workflows/test-integration.yml +++ b/.github/workflows/test-integration.yml @@ -8,7 +8,7 @@ on: jobs: tests: - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, ARM64, langflow-ai-arm64-2] env: # Prefer repository/environment variable first, then secret, then a sane fallback OPENSEARCH_PASSWORD: ${{ vars.OPENSEARCH_PASSWORD || secrets.OPENSEARCH_PASSWORD || 'OpenRag#2025!' }} From af1163e449121ba81b09c1fb66c0bd27e75104c2 Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 10:20:36 -0400 Subject: [PATCH 33/58] remove sudo disk cleanup --- .github/workflows/test-integration.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml index e20a5b70..44a2abbf 100644 --- a/.github/workflows/test-integration.yml +++ b/.github/workflows/test-integration.yml @@ -16,11 +16,11 @@ jobs: steps: - run: df -h - - name: "node-cleanup" - run: | - sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL - sudo docker image prune --all --force - sudo docker builder prune -a + #- name: "node-cleanup" + #run: | + # sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL + # sudo docker image prune --all --force + # sudo docker builder prune -a - run: df -h - name: Checkout uses: actions/checkout@v4 From bccbcf8d12fe61fcb73ed70746a5904fdeb36ddb Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 10:36:27 -0400 Subject: [PATCH 34/58] torch extra --- .github/workflows/test-integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml index 44a2abbf..e2afa334 100644 --- a/.github/workflows/test-integration.yml +++ b/.github/workflows/test-integration.yml @@ -34,7 +34,7 @@ jobs: run: uv python install 3.13 - name: Install dependencies - run: uv sync --group dev + run: uv sync --group dev --extra torch-cu128 - name: Run integration tests env: From 188aa7586680cc17c76d9b475c3de8377972ccbf Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 10:53:51 -0400 Subject: [PATCH 35/58] torch extra --- .github/workflows/test-integration.yml | 2 +- Dockerfile.backend | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml index e2afa334..a46f911f 100644 --- a/.github/workflows/test-integration.yml +++ b/.github/workflows/test-integration.yml @@ -34,7 +34,7 @@ jobs: run: uv python install 3.13 - name: Install dependencies - run: uv sync --group dev --extra torch-cu128 + run: uv sync --group dev --extra torch - name: Run integration tests env: diff --git a/Dockerfile.backend b/Dockerfile.backend index d314eefe..5d9d84f4 100644 --- a/Dockerfile.backend +++ b/Dockerfile.backend @@ -18,7 +18,7 @@ WORKDIR /app # Copy Python dependencies COPY pyproject.toml uv.lock ./ -RUN uv sync --extra torch-cu128 +RUN uv sync # Copy sample document and warmup script for docling COPY documents/warmup_ocr.pdf ./ From ab6eb6e779f3f3a9d8904d89d3279e2dd4f73693 Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 10:55:14 -0400 Subject: [PATCH 36/58] no torch --- .github/workflows/test-integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml index a46f911f..44a2abbf 100644 --- a/.github/workflows/test-integration.yml +++ b/.github/workflows/test-integration.yml @@ -34,7 +34,7 @@ jobs: run: uv python install 3.13 - name: Install dependencies - run: uv sync --group dev --extra torch + run: uv sync --group dev - name: Run integration tests env: From c6907e104ae4a0d25fd21225031dfd38b102619a Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 10:56:34 -0400 Subject: [PATCH 37/58] test without dev dependencies --- .github/workflows/test-integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml index 44a2abbf..51b856b3 100644 --- a/.github/workflows/test-integration.yml +++ b/.github/workflows/test-integration.yml @@ -34,7 +34,7 @@ jobs: run: uv python install 3.13 - name: Install dependencies - run: uv sync --group dev + run: uv sync - name: Run integration tests env: From b8e8440397b87b914db0c7d6d8381ad7040c4d63 Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 11:07:12 -0400 Subject: [PATCH 38/58] fix: add router back --- frontend/src/app/admin/page.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/app/admin/page.tsx b/frontend/src/app/admin/page.tsx index c8c9ecf8..a318d511 100644 --- a/frontend/src/app/admin/page.tsx +++ b/frontend/src/app/admin/page.tsx @@ -51,7 +51,7 @@ function AdminPage() { const formData = new FormData() formData.append("file", selectedFile) - const response = await fetch("/api/upload", { + const response = await fetch("/api/router/upload_ingest", { method: "POST", body: formData, }) From 65d7430fac2bb4c84db7995d37d9ee9428cb82dd Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 11:08:38 -0400 Subject: [PATCH 39/58] fixes --- pyproject.toml | 20 +--- src/api/upload_utils.py | 47 -------- uv.lock | 240 +++++++++++++--------------------------- 3 files changed, 83 insertions(+), 224 deletions(-) delete mode 100644 src/api/upload_utils.py diff --git a/pyproject.toml b/pyproject.toml index bc8cb811..cbdd7be4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ dependencies = [ "pyjwt>=2.8.0", "python-multipart>=0.0.20", "starlette>=0.47.1", + "torch>=2.7.1", "uvicorn>=0.35.0", "boto3>=1.35.0", "psutil>=7.0.0", @@ -30,10 +31,6 @@ dependencies = [ "docling-serve>=1.4.1", ] -[project.optional-dependencies] -torch = ["torch", "torchvision"] -torch-cu128 = ["torch", "torchvision"] - [dependency-groups] dev = ["pytest>=8", "pytest-asyncio>=0.21.0", "pytest-mock>=3.12.0", "pytest-cov>=4.0.0"] @@ -46,20 +43,13 @@ package = true [tool.uv.sources] torch = [ - { index = "pytorch-cu128", extra = "torch-cu128", marker = "sys_platform == 'linux'" }, - { index = "pytorch-cpu", extra = "torch", marker = "sys_platform != 'linux'" } + { index = "pytorch-cu128", marker = "sys_platform == 'linux' and platform_machine == 'x86_64'" }, ] torchvision = [ - { index = "pytorch-cu128", extra = "torch-cu128", marker = "sys_platform == 'linux'" }, - { index = "pytorch-cpu", extra = "torch", marker = "sys_platform != 'linux'" } + { index = "pytorch-cu128", marker = "sys_platform == 'linux' and platform_machine == 'x86_64'" }, ] [[tool.uv.index]] -name = "pytorch-cu128" -url = "https://download.pytorch.org/whl/cu128" -explicit = true - -[[tool.uv.index]] -name = "pytorch-cpu" -url = "https://download.pytorch.org/whl/cpu" +name = "pytorch-cu128" +url = "https://download.pytorch.org/whl/cu128" explicit = true diff --git a/src/api/upload_utils.py b/src/api/upload_utils.py deleted file mode 100644 index f2479107..00000000 --- a/src/api/upload_utils.py +++ /dev/null @@ -1,47 +0,0 @@ -from typing import List - -from starlette.requests import Request - - -async def extract_user_context(request: Request) -> dict: - """Extract user/auth context from request.state. Honors no-auth mode.""" - from config.settings import is_no_auth_mode - - user = getattr(request.state, "user", None) - jwt_token = getattr(request.state, "jwt_token", None) - - if is_no_auth_mode(): - return { - "owner_user_id": None, - "owner_name": None, - "owner_email": None, - "jwt_token": None, - } - - return { - "owner_user_id": getattr(user, "user_id", None), - "owner_name": getattr(user, "name", None), - "owner_email": getattr(user, "email", None), - "jwt_token": jwt_token, - } - - -async def create_temp_files_from_form_files(upload_files: List) -> list[str]: - """Persist UploadFile items to temp files; return list of paths.""" - import tempfile - import os - - temp_file_paths: list[str] = [] - for upload_file in upload_files: - content = await upload_file.read() - safe_filename = ( - upload_file.filename.replace(" ", "_").replace("/", "_") - if getattr(upload_file, "filename", None) - else "uploaded" - ) - fd, temp_path = tempfile.mkstemp(suffix=f"_{safe_filename}") - with os.fdopen(fd, "wb") as temp_file: - temp_file.write(content) - temp_file_paths.append(temp_path) - return temp_file_paths - diff --git a/uv.lock b/uv.lock index fd5164cb..8b795659 100644 --- a/uv.lock +++ b/uv.lock @@ -2,11 +2,10 @@ version = 1 revision = 2 requires-python = ">=3.13" resolution-markers = [ - "sys_platform == 'darwin'", - "platform_machine == 'aarch64' and sys_platform == 'linux'", "platform_machine == 'x86_64' and sys_platform == 'linux'", - "sys_platform != 'darwin' and sys_platform != 'linux'", - "platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux'", + "platform_machine == 'aarch64' and sys_platform == 'linux'", + "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')", + "sys_platform == 'darwin'", ] [[package]] @@ -21,9 +20,8 @@ dependencies = [ { name = "psutil" }, { name = "pyyaml" }, { name = "safetensors" }, - { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" }, - { name = "torch", version = "2.8.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, - { name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/b1/72/ff3961c19ee395c3d30ac630ee77bfb0e1b46b87edc504d4f83bb4a89705/accelerate-1.10.1.tar.gz", hash = "sha256:3dea89e433420e4bfac0369cae7e36dcd6a56adfcfd38cdda145c6225eab5df8", size = 392446, upload-time = "2025-08-25T13:57:06.21Z" } wheels = [ @@ -293,10 +291,9 @@ name = "click" version = "8.2.1" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "platform_machine == 'aarch64' and sys_platform == 'linux'", "platform_machine == 'x86_64' and sys_platform == 'linux'", - "sys_platform != 'darwin' and sys_platform != 'linux'", - "platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux'", + "platform_machine == 'aarch64' and sys_platform == 'linux'", + "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')", ] dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, @@ -518,10 +515,9 @@ name = "dill" version = "0.4.0" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "platform_machine == 'aarch64' and sys_platform == 'linux'", "platform_machine == 'x86_64' and sys_platform == 'linux'", - "sys_platform != 'darwin' and sys_platform != 'linux'", - "platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux'", + "platform_machine == 'aarch64' and sys_platform == 'linux'", + "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')", ] sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" } wheels = [ @@ -635,13 +631,10 @@ dependencies = [ { name = "pydantic" }, { name = "rtree" }, { name = "safetensors", extra = ["torch"] }, - { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" }, - { name = "torch", version = "2.8.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, - { name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, - { name = "torchvision", version = "0.22.1", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, - { name = "torchvision", version = "0.22.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" }, - { name = "torchvision", version = "0.23.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, - { name = "torchvision", version = "0.23.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, + { name = "torchvision", version = "0.22.1", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "torchvision", version = "0.23.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, { name = "tqdm" }, { name = "transformers" }, ] @@ -687,10 +680,9 @@ name = "docling-mcp" version = "1.1.0" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "platform_machine == 'aarch64' and sys_platform == 'linux'", "platform_machine == 'x86_64' and sys_platform == 'linux'", - "sys_platform != 'darwin' and sys_platform != 'linux'", - "platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux'", + "platform_machine == 'aarch64' and sys_platform == 'linux'", + "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')", ] dependencies = [ { name = "docling", marker = "sys_platform != 'darwin'" }, @@ -795,13 +787,10 @@ dependencies = [ { name = "scikit-image" }, { name = "scipy" }, { name = "shapely" }, - { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" }, - { name = "torch", version = "2.8.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, - { name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, - { name = "torchvision", version = "0.22.1", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, - { name = "torchvision", version = "0.22.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" }, - { name = "torchvision", version = "0.23.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, - { name = "torchvision", version = "0.23.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, + { name = "torchvision", version = "0.22.1", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "torchvision", version = "0.23.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/bb/84/4a2cab0e6adde6a85e7ba543862e5fc0250c51f3ac721a078a55cdcff250/easyocr-1.7.2-py3-none-any.whl", hash = "sha256:5be12f9b0e595d443c9c3d10b0542074b50f0ec2d98b141a109cd961fd1c177c", size = 2870178, upload-time = "2024-09-24T11:34:43.554Z" }, @@ -1015,10 +1004,9 @@ name = "fsspec" version = "2025.5.1" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "platform_machine == 'aarch64' and sys_platform == 'linux'", "platform_machine == 'x86_64' and sys_platform == 'linux'", - "sys_platform != 'darwin' and sys_platform != 'linux'", - "platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux'", + "platform_machine == 'aarch64' and sys_platform == 'linux'", + "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')", ] sdist = { url = "https://files.pythonhosted.org/packages/00/f7/27f15d41f0ed38e8fcc488584b57e902b331da7f7c6dcda53721b15838fc/fsspec-2025.5.1.tar.gz", hash = "sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475", size = 303033, upload-time = "2025-05-24T12:03:23.792Z" } wheels = [ @@ -1337,10 +1325,9 @@ name = "huggingface-hub" version = "0.33.2" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "platform_machine == 'aarch64' and sys_platform == 'linux'", "platform_machine == 'x86_64' and sys_platform == 'linux'", - "sys_platform != 'darwin' and sys_platform != 'linux'", - "platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux'", + "platform_machine == 'aarch64' and sys_platform == 'linux'", + "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')", ] dependencies = [ { name = "filelock", marker = "sys_platform != 'darwin'" }, @@ -2043,10 +2030,9 @@ name = "multiprocess" version = "0.70.18" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "platform_machine == 'aarch64' and sys_platform == 'linux'", "platform_machine == 'x86_64' and sys_platform == 'linux'", - "sys_platform != 'darwin' and sys_platform != 'linux'", - "platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux'", + "platform_machine == 'aarch64' and sys_platform == 'linux'", + "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')", ] dependencies = [ { name = "dill", version = "0.4.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'darwin'" }, @@ -2391,29 +2377,11 @@ dependencies = [ { name = "structlog" }, { name = "textual" }, { name = "textual-fspicker" }, + { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, { name = "uvicorn" }, ] -[package.optional-dependencies] -torch = [ - { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" }, - { name = "torch", version = "2.8.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, - { name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, - { name = "torchvision", version = "0.22.1", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, - { name = "torchvision", version = "0.22.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" }, - { name = "torchvision", version = "0.23.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, - { name = "torchvision", version = "0.23.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, -] -torch-cu128 = [ - { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" }, - { name = "torch", version = "2.8.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, - { name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, - { name = "torchvision", version = "0.22.1", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, - { name = "torchvision", version = "0.22.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" }, - { name = "torchvision", version = "0.23.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, - { name = "torchvision", version = "0.23.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, -] - [package.dev-dependencies] dev = [ { name = "pytest" }, @@ -2446,17 +2414,10 @@ requires-dist = [ { name = "structlog", specifier = ">=25.4.0" }, { name = "textual", specifier = ">=0.45.0" }, { name = "textual-fspicker", specifier = ">=0.6.0" }, - { name = "torch", marker = "sys_platform == 'linux' and extra == 'torch'" }, - { name = "torch", marker = "sys_platform == 'linux' and extra == 'torch-cu128'", index = "https://download.pytorch.org/whl/cu128", conflict = { package = "openrag", extra = "torch-cu128" } }, - { name = "torch", marker = "sys_platform != 'linux' and extra == 'torch'", index = "https://download.pytorch.org/whl/cpu", conflict = { package = "openrag", extra = "torch" } }, - { name = "torch", marker = "sys_platform != 'linux' and extra == 'torch-cu128'" }, - { name = "torchvision", marker = "sys_platform == 'linux' and extra == 'torch'" }, - { name = "torchvision", marker = "sys_platform == 'linux' and extra == 'torch-cu128'", index = "https://download.pytorch.org/whl/cu128", conflict = { package = "openrag", extra = "torch-cu128" } }, - { name = "torchvision", marker = "sys_platform != 'linux' and extra == 'torch'", index = "https://download.pytorch.org/whl/cpu", conflict = { package = "openrag", extra = "torch" } }, - { name = "torchvision", marker = "sys_platform != 'linux' and extra == 'torch-cu128'" }, + { name = "torch", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'", specifier = ">=2.7.1" }, + { name = "torch", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'", specifier = ">=2.7.1", index = "https://download.pytorch.org/whl/cu128" }, { name = "uvicorn", specifier = ">=0.35.0" }, ] -provides-extras = ["torch", "torch-cu128"] [package.metadata.requires-dev] dev = [ @@ -3440,9 +3401,8 @@ wheels = [ [package.optional-dependencies] torch = [ { name = "numpy" }, - { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" }, - { name = "torch", version = "2.8.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, - { name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, ] [[package]] @@ -3766,15 +3726,13 @@ name = "torch" version = "2.7.1+cu128" source = { registry = "https://download.pytorch.org/whl/cu128" } resolution-markers = [ - "platform_machine == 'aarch64' and sys_platform == 'linux'", "platform_machine == 'x86_64' and sys_platform == 'linux'", - "platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux'", ] dependencies = [ - { name = "filelock", marker = "sys_platform == 'linux'" }, - { name = "fsspec", version = "2025.5.1", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" }, - { name = "jinja2", marker = "sys_platform == 'linux'" }, - { name = "networkx", marker = "sys_platform == 'linux'" }, + { name = "filelock", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "fsspec", version = "2025.5.1", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "jinja2", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "networkx", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, @@ -3789,128 +3747,86 @@ dependencies = [ { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "setuptools", marker = "sys_platform == 'linux'" }, - { name = "sympy", marker = "sys_platform == 'linux'" }, - { name = "triton", marker = "sys_platform == 'linux'" }, - { name = "typing-extensions", marker = "sys_platform == 'linux'" }, + { name = "setuptools", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "sympy", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "typing-extensions", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, ] wheels = [ - { url = "https://download.pytorch.org/whl/cu128/torch-2.7.1%2Bcu128-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:d56d29a6ad7758ba5173cc2b0c51c93e126e2b0a918e874101dc66545283967f" }, { url = "https://download.pytorch.org/whl/cu128/torch-2.7.1%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:9560425f9ea1af1791507e8ca70d5b9ecf62fed7ca226a95fcd58d0eb2cca78f" }, - { url = "https://download.pytorch.org/whl/cu128/torch-2.7.1%2Bcu128-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:f112465fdf42eb1297c6dddda1a8b7f411914428b704e1b8a47870c52e290909" }, { url = "https://download.pytorch.org/whl/cu128/torch-2.7.1%2Bcu128-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:c355db49c218ada70321d5c5c9bb3077312738b99113c8f3723ef596b554a7b9" }, ] [[package]] name = "torch" version = "2.8.0" -source = { registry = "https://download.pytorch.org/whl/cpu" } +source = { registry = "https://pypi.org/simple" } resolution-markers = [ + "platform_machine == 'aarch64' and sys_platform == 'linux'", + "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')", "sys_platform == 'darwin'", ] dependencies = [ - { name = "filelock", marker = "sys_platform == 'darwin'" }, + { name = "filelock", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, { name = "fsspec", version = "2025.3.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" }, - { name = "jinja2", marker = "sys_platform == 'darwin'" }, - { name = "networkx", marker = "sys_platform == 'darwin'" }, - { name = "setuptools", marker = "sys_platform == 'darwin'" }, - { name = "sympy", marker = "sys_platform == 'darwin'" }, - { name = "typing-extensions", marker = "sys_platform == 'darwin'" }, + { name = "fsspec", version = "2025.5.1", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, + { name = "jinja2", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, + { name = "networkx", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, + { name = "setuptools", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, + { name = "sympy", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, + { name = "typing-extensions", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, ] wheels = [ - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:fbe2e149c5174ef90d29a5f84a554dfaf28e003cb4f61fa2c8c024c17ec7ca58" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:057efd30a6778d2ee5e2374cd63a63f63311aa6f33321e627c655df60abdd390" }, -] - -[[package]] -name = "torch" -version = "2.8.0+cpu" -source = { registry = "https://download.pytorch.org/whl/cpu" } -resolution-markers = [ - "sys_platform != 'darwin' and sys_platform != 'linux'", -] -dependencies = [ - { name = "filelock", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, - { name = "fsspec", version = "2025.5.1", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, - { name = "jinja2", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, - { name = "networkx", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, - { name = "setuptools", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, - { name = "sympy", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, - { name = "typing-extensions", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, -] -wheels = [ - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:e1ee1b2346ade3ea90306dfbec7e8ff17bc220d344109d189ae09078333b0856" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-win_arm64.whl", hash = "sha256:64c187345509f2b1bb334feed4666e2c781ca381874bde589182f81247e61f88" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:6d93a7165419bc4b2b907e859ccab0dea5deeab261448ae9a5ec5431f14c0e64" }, + { url = "https://files.pythonhosted.org/packages/10/4e/469ced5a0603245d6a19a556e9053300033f9c5baccf43a3d25ba73e189e/torch-2.8.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2b2f96814e0345f5a5aed9bf9734efa913678ed19caf6dc2cddb7930672d6128", size = 101936856, upload-time = "2025-08-06T14:54:01.526Z" }, + { url = "https://files.pythonhosted.org/packages/16/82/3948e54c01b2109238357c6f86242e6ecbf0c63a1af46906772902f82057/torch-2.8.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:65616ca8ec6f43245e1f5f296603e33923f4c30f93d65e103d9e50c25b35150b", size = 887922844, upload-time = "2025-08-06T14:55:50.78Z" }, + { url = "https://files.pythonhosted.org/packages/e3/54/941ea0a860f2717d86a811adf0c2cd01b3983bdd460d0803053c4e0b8649/torch-2.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:659df54119ae03e83a800addc125856effda88b016dfc54d9f65215c3975be16", size = 241330968, upload-time = "2025-08-06T14:54:45.293Z" }, + { url = "https://files.pythonhosted.org/packages/de/69/8b7b13bba430f5e21d77708b616f767683629fc4f8037564a177d20f90ed/torch-2.8.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:1a62a1ec4b0498930e2543535cf70b1bef8c777713de7ceb84cd79115f553767", size = 73915128, upload-time = "2025-08-06T14:54:34.769Z" }, + { url = "https://files.pythonhosted.org/packages/15/0e/8a800e093b7f7430dbaefa80075aee9158ec22e4c4fc3c1a66e4fb96cb4f/torch-2.8.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:83c13411a26fac3d101fe8035a6b0476ae606deb8688e904e796a3534c197def", size = 102020139, upload-time = "2025-08-06T14:54:39.047Z" }, + { url = "https://files.pythonhosted.org/packages/4a/15/5e488ca0bc6162c86a33b58642bc577c84ded17c7b72d97e49b5833e2d73/torch-2.8.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:8f0a9d617a66509ded240add3754e462430a6c1fc5589f86c17b433dd808f97a", size = 887990692, upload-time = "2025-08-06T14:56:18.286Z" }, + { url = "https://files.pythonhosted.org/packages/b4/a8/6a04e4b54472fc5dba7ca2341ab219e529f3c07b6941059fbf18dccac31f/torch-2.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:a7242b86f42be98ac674b88a4988643b9bc6145437ec8f048fea23f72feb5eca", size = 241603453, upload-time = "2025-08-06T14:55:22.945Z" }, + { url = "https://files.pythonhosted.org/packages/04/6e/650bb7f28f771af0cb791b02348db8b7f5f64f40f6829ee82aa6ce99aabe/torch-2.8.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:7b677e17f5a3e69fdef7eb3b9da72622f8d322692930297e4ccb52fefc6c8211", size = 73632395, upload-time = "2025-08-06T14:55:28.645Z" }, ] [[package]] name = "torchvision" version = "0.22.1" -source = { registry = "https://download.pytorch.org/whl/cu128" } -resolution-markers = [ - "platform_machine == 'aarch64' and sys_platform == 'linux'", -] -dependencies = [ - { name = "numpy", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, - { name = "pillow", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, - { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, -] -wheels = [ - { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:75f519ebe412ced95d727c71c30c68084cc6fd36347b88f338e88ff9d07a3ac8" }, - { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:f6565fd22e04e51f9600f34a3a20b120ee9f5a73161bfcb79c826225054aa44e" }, -] - -[[package]] -name = "torchvision" -version = "0.22.1+cu128" -source = { registry = "https://download.pytorch.org/whl/cu128" } +source = { registry = "https://pypi.org/simple" } resolution-markers = [ "platform_machine == 'x86_64' and sys_platform == 'linux'", - "platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux'", ] dependencies = [ - { name = "numpy", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" }, - { name = "pillow", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" }, - { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" }, + { name = "numpy", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "pillow", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, ] wheels = [ - { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.1%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:bc4fef193917b51db6b409acd3ffdec9286d877baac0aee5dcfbb72592d00bfc" }, - { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.1%2Bcu128-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:02faf51fbf5070592768fa935327d13a484b745faef38b0fee01d85cfb35f5bc" }, + { url = "https://files.pythonhosted.org/packages/8d/b0/3cffd6a285b5ffee3fe4a31caff49e350c98c5963854474d1c4f7a51dea5/torchvision-0.22.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:7ee682be589bb1a002b7704f06b8ec0b89e4b9068f48e79307d2c6e937a9fdf4", size = 7485894, upload-time = "2025-06-04T17:43:01.371Z" }, + { url = "https://files.pythonhosted.org/packages/94/8b/04c6b15f8c29b39f0679589753091cec8b192ab296d4fdaf9055544c4ec9/torchvision-0.22.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ef46e065502f7300ad6abc98554131c35dc4c837b978d91306658f1a65c00baa", size = 7658543, upload-time = "2025-06-04T17:42:46.064Z" }, ] [[package]] name = "torchvision" version = "0.23.0" -source = { registry = "https://download.pytorch.org/whl/cpu" } +source = { registry = "https://pypi.org/simple" } resolution-markers = [ + "platform_machine == 'aarch64' and sys_platform == 'linux'", + "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')", "sys_platform == 'darwin'", ] dependencies = [ - { name = "numpy", marker = "sys_platform == 'darwin'" }, - { name = "pillow", marker = "sys_platform == 'darwin'" }, - { name = "torch", version = "2.8.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, + { name = "numpy", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, + { name = "pillow", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, + { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" }, ] wheels = [ - { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1c37e325e09a184b730c3ef51424f383ec5745378dc0eca244520aca29722600" }, - { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:2df618e1143805a7673aaf82cb5720dd9112d4e771983156aaf2ffff692eebf9" }, -] - -[[package]] -name = "torchvision" -version = "0.23.0+cpu" -source = { registry = "https://download.pytorch.org/whl/cpu" } -resolution-markers = [ - "sys_platform != 'darwin' and sys_platform != 'linux'", -] -dependencies = [ - { name = "numpy", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, - { name = "pillow", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, - { name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, -] -wheels = [ - { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:82928788025170c62e7df1120dcdc0cd175bfc31c08374613ce6d1a040bc0cda" }, - { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:8d6a47e23d7896f0ef9aa7ea7179eb6324e82438aa66d19884c2020d0646b104" }, + { url = "https://files.pythonhosted.org/packages/91/37/45a5b9407a7900f71d61b2b2f62db4b7c632debca397f205fdcacb502780/torchvision-0.23.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1c37e325e09a184b730c3ef51424f383ec5745378dc0eca244520aca29722600", size = 1856886, upload-time = "2025-08-06T14:58:05.491Z" }, + { url = "https://files.pythonhosted.org/packages/ac/da/a06c60fc84fc849377cf035d3b3e9a1c896d52dbad493b963c0f1cdd74d0/torchvision-0.23.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2f7fd6c15f3697e80627b77934f77705f3bc0e98278b989b2655de01f6903e1d", size = 2353112, upload-time = "2025-08-06T14:58:26.265Z" }, + { url = "https://files.pythonhosted.org/packages/a0/27/5ce65ba5c9d3b7d2ccdd79892ab86a2f87ac2ca6638f04bb0280321f1a9c/torchvision-0.23.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:a76fafe113b2977be3a21bf78f115438c1f88631d7a87203acb3dd6ae55889e6", size = 8627658, upload-time = "2025-08-06T14:58:15.999Z" }, + { url = "https://files.pythonhosted.org/packages/1f/e4/028a27b60aa578a2fa99d9d7334ff1871bb17008693ea055a2fdee96da0d/torchvision-0.23.0-cp313-cp313-win_amd64.whl", hash = "sha256:07d069cb29691ff566e3b7f11f20d91044f079e1dbdc9d72e0655899a9b06938", size = 1600749, upload-time = "2025-08-06T14:58:10.719Z" }, + { url = "https://files.pythonhosted.org/packages/05/35/72f91ad9ac7c19a849dedf083d347dc1123f0adeb401f53974f84f1d04c8/torchvision-0.23.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:2df618e1143805a7673aaf82cb5720dd9112d4e771983156aaf2ffff692eebf9", size = 2047192, upload-time = "2025-08-06T14:58:11.813Z" }, + { url = "https://files.pythonhosted.org/packages/1d/9d/406cea60a9eb9882145bcd62a184ee61e823e8e1d550cdc3c3ea866a9445/torchvision-0.23.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:2a3299d2b1d5a7aed2d3b6ffb69c672ca8830671967eb1cee1497bacd82fe47b", size = 2359295, upload-time = "2025-08-06T14:58:17.469Z" }, + { url = "https://files.pythonhosted.org/packages/2b/f4/34662f71a70fa1e59de99772142f22257ca750de05ccb400b8d2e3809c1d/torchvision-0.23.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:76bc4c0b63d5114aa81281390f8472a12a6a35ce9906e67ea6044e5af4cab60c", size = 8800474, upload-time = "2025-08-06T14:58:22.53Z" }, + { url = "https://files.pythonhosted.org/packages/6e/f5/b5a2d841a8d228b5dbda6d524704408e19e7ca6b7bb0f24490e081da1fa1/torchvision-0.23.0-cp313-cp313t-win_amd64.whl", hash = "sha256:b9e2dabf0da9c8aa9ea241afb63a8f3e98489e706b22ac3f30416a1be377153b", size = 1527667, upload-time = "2025-08-06T14:58:14.446Z" }, ] [[package]] @@ -3952,7 +3868,7 @@ name = "triton" version = "3.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "setuptools", marker = "sys_platform == 'linux'" }, + { name = "setuptools", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/74/1f/dfb531f90a2d367d914adfee771babbd3f1a5b26c3f5fbc458dee21daa78/triton-3.3.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b89d846b5a4198317fec27a5d3a609ea96b6d557ff44b56c23176546023c4240", size = 155673035, upload-time = "2025-05-29T23:40:02.468Z" }, From a669c19391a1335eb9664d3cc49000170c8408c3 Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 12:54:24 -0400 Subject: [PATCH 40/58] bigger runner disk --- .github/workflows/test-integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml index 51b856b3..16f33c41 100644 --- a/.github/workflows/test-integration.yml +++ b/.github/workflows/test-integration.yml @@ -8,7 +8,7 @@ on: jobs: tests: - runs-on: [self-hosted, linux, ARM64, langflow-ai-arm64-2] + runs-on: [self-hosted, linux, ARM64, langflow-ai-arm64-40gb] env: # Prefer repository/environment variable first, then secret, then a sane fallback OPENSEARCH_PASSWORD: ${{ vars.OPENSEARCH_PASSWORD || secrets.OPENSEARCH_PASSWORD || 'OpenRag#2025!' }} From 325358e917cff75d3366ec6dafd7ef9ca4942c17 Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 13:18:31 -0400 Subject: [PATCH 41/58] key permissions --- src/main.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/main.py b/src/main.py index 732eee1f..1094f8b5 100644 --- a/src/main.py +++ b/src/main.py @@ -242,6 +242,9 @@ def generate_jwt_keys(): capture_output=True, ) + # Set restrictive permissions on private key (readable by owner only) + os.chmod(private_key_path, 0o600) + # Generate public key subprocess.run( [ @@ -257,12 +260,21 @@ def generate_jwt_keys(): capture_output=True, ) + # Set permissions on public key (readable by all) + os.chmod(public_key_path, 0o644) + logger.info("Generated RSA keys for JWT signing") except subprocess.CalledProcessError as e: logger.error("Failed to generate RSA keys", error=str(e)) raise else: - logger.info("RSA keys already exist, skipping generation") + # Ensure correct permissions on existing keys + try: + os.chmod(private_key_path, 0o600) + os.chmod(public_key_path, 0o644) + logger.info("RSA keys already exist, ensured correct permissions") + except OSError as e: + logger.warning("Failed to set permissions on existing keys", error=str(e)) async def init_index_when_ready(): From 6a68a46ef3257697d2f85e9fff77e3ddf6a3362f Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 13:40:16 -0400 Subject: [PATCH 42/58] makefile: test-ci, create keys if needed --- Makefile | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Makefile b/Makefile index 47d61705..c4f85c68 100644 --- a/Makefile +++ b/Makefile @@ -194,11 +194,23 @@ test-ci: @set -e; \ echo "Installing test dependencies..."; \ uv sync --group dev; \ + if [ ! -f keys/private_key.pem ]; then \ + echo "Generating RSA keys for JWT signing..."; \ + uv run python -c "from src.main import generate_jwt_keys; generate_jwt_keys()"; \ + else \ + echo "RSA keys already exist, ensuring correct permissions..."; \ + chmod 600 keys/private_key.pem 2>/dev/null || true; \ + chmod 644 keys/public_key.pem 2>/dev/null || true; \ + fi; \ echo "Starting infra (OpenSearch + Dashboards + Langflow) with CPU containers"; \ docker compose -f docker-compose-cpu.yml up -d opensearch dashboards langflow; \ echo "Starting docling-serve..."; \ DOCLING_ENDPOINT=$$(uv run python scripts/docling_ctl.py start --port 5001 | grep "Endpoint:" | awk '{print $$2}'); \ echo "Docling-serve started at $$DOCLING_ENDPOINT"; \ + echo "Waiting for backend OIDC endpoint..."; \ + for i in $$(seq 1 60); do \ + curl -s http://localhost:8000/.well-known/openid-configuration >/dev/null 2>&1 && break || sleep 2; \ + done; \ echo "Waiting for OpenSearch..."; \ for i in $$(seq 1 60); do \ curl -k -s https://localhost:9200 -u admin:$${OPENSEARCH_PASSWORD} >/dev/null 2>&1 && break || sleep 2; \ From d0fb8af3c8cc29e026090fc3868a9a77195d50bc Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 13:49:06 -0400 Subject: [PATCH 43/58] poll to ensure securityconfig is applied --- Makefile | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index c4f85c68..48c11486 100644 --- a/Makefile +++ b/Makefile @@ -211,9 +211,18 @@ test-ci: for i in $$(seq 1 60); do \ curl -s http://localhost:8000/.well-known/openid-configuration >/dev/null 2>&1 && break || sleep 2; \ done; \ - echo "Waiting for OpenSearch..."; \ + echo "Waiting for OpenSearch with JWT auth to work..."; \ for i in $$(seq 1 60); do \ - curl -k -s https://localhost:9200 -u admin:$${OPENSEARCH_PASSWORD} >/dev/null 2>&1 && break || sleep 2; \ + if curl -k -s https://localhost:9200 -u admin:$${OPENSEARCH_PASSWORD} >/dev/null 2>&1; then \ + TOKEN=$$(curl -s http://localhost:8000/auth/me | grep -o '"token":"[^"]*"' | cut -d'"' -f4 || echo ""); \ + if [ -n "$$TOKEN" ]; then \ + if curl -k -s -H "Authorization: Bearer $$TOKEN" https://localhost:9200/documents/_search -d '{"query":{"match_all":{}}}' 2>&1 | grep -v "Unauthorized" >/dev/null; then \ + echo "OpenSearch JWT auth working"; \ + break; \ + fi; \ + fi; \ + fi; \ + sleep 2; \ done; \ echo "Waiting for Langflow..."; \ for i in $$(seq 1 60); do \ From fb64c3e966f8b0612a2876e2ae5676697e73ccf7 Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 14:01:38 -0400 Subject: [PATCH 44/58] os security check --- Makefile | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 48c11486..4712fac2 100644 --- a/Makefile +++ b/Makefile @@ -212,18 +212,26 @@ test-ci: curl -s http://localhost:8000/.well-known/openid-configuration >/dev/null 2>&1 && break || sleep 2; \ done; \ echo "Waiting for OpenSearch with JWT auth to work..."; \ + JWT_AUTH_READY=false; \ for i in $$(seq 1 60); do \ if curl -k -s https://localhost:9200 -u admin:$${OPENSEARCH_PASSWORD} >/dev/null 2>&1; then \ TOKEN=$$(curl -s http://localhost:8000/auth/me | grep -o '"token":"[^"]*"' | cut -d'"' -f4 || echo ""); \ if [ -n "$$TOKEN" ]; then \ if curl -k -s -H "Authorization: Bearer $$TOKEN" https://localhost:9200/documents/_search -d '{"query":{"match_all":{}}}' 2>&1 | grep -v "Unauthorized" >/dev/null; then \ - echo "OpenSearch JWT auth working"; \ + echo "โœ“ OpenSearch JWT auth working after $$((i*2)) seconds"; \ + JWT_AUTH_READY=true; \ break; \ fi; \ fi; \ fi; \ sleep 2; \ done; \ + if [ "$$JWT_AUTH_READY" = "false" ]; then \ + echo "โœ— ERROR: OpenSearch JWT authentication failed to work after 120 seconds!"; \ + echo " This likely means the OIDC security configuration was not applied correctly."; \ + echo " Check OpenSearch logs: docker logs os"; \ + exit 1; \ + fi; \ echo "Waiting for Langflow..."; \ for i in $$(seq 1 60); do \ curl -s http://localhost:7860/ >/dev/null 2>&1 && break || sleep 2; \ From fcfb07fee2a712765f9c38d0d2698367374f8a7d Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 14:09:20 -0400 Subject: [PATCH 45/58] dump logs --- Makefile | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 4712fac2..e7a29d2f 100644 --- a/Makefile +++ b/Makefile @@ -227,9 +227,21 @@ test-ci: sleep 2; \ done; \ if [ "$$JWT_AUTH_READY" = "false" ]; then \ + echo ""; \ + echo "========================================================================"; \ echo "โœ— ERROR: OpenSearch JWT authentication failed to work after 120 seconds!"; \ - echo " This likely means the OIDC security configuration was not applied correctly."; \ - echo " Check OpenSearch logs: docker logs os"; \ + echo "========================================================================"; \ + echo ""; \ + echo "Dumping OpenSearch container logs:"; \ + echo "------------------------------------------------------------------------"; \ + docker logs os --tail 100; \ + echo "------------------------------------------------------------------------"; \ + echo ""; \ + echo "Dumping backend container logs:"; \ + echo "------------------------------------------------------------------------"; \ + docker logs openrag-backend --tail 50; \ + echo "------------------------------------------------------------------------"; \ + echo ""; \ exit 1; \ fi; \ echo "Waiting for Langflow..."; \ From a63530e0226c2c44361921a33f84848fa666341e Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 14:20:13 -0400 Subject: [PATCH 46/58] fix token gen --- Makefile | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index e7a29d2f..4eb6293b 100644 --- a/Makefile +++ b/Makefile @@ -209,19 +209,18 @@ test-ci: echo "Docling-serve started at $$DOCLING_ENDPOINT"; \ echo "Waiting for backend OIDC endpoint..."; \ for i in $$(seq 1 60); do \ - curl -s http://localhost:8000/.well-known/openid-configuration >/dev/null 2>&1 && break || sleep 2; \ + docker exec openrag-backend curl -s http://localhost:8000/.well-known/openid-configuration >/dev/null 2>&1 && break || sleep 2; \ done; \ + echo "Generating test JWT token..."; \ + TEST_TOKEN=$$(uv run python -c "from src.session_manager import SessionManager, AnonymousUser; sm = SessionManager('test'); print(sm.create_jwt_token(AnonymousUser()))"); \ echo "Waiting for OpenSearch with JWT auth to work..."; \ JWT_AUTH_READY=false; \ for i in $$(seq 1 60); do \ if curl -k -s https://localhost:9200 -u admin:$${OPENSEARCH_PASSWORD} >/dev/null 2>&1; then \ - TOKEN=$$(curl -s http://localhost:8000/auth/me | grep -o '"token":"[^"]*"' | cut -d'"' -f4 || echo ""); \ - if [ -n "$$TOKEN" ]; then \ - if curl -k -s -H "Authorization: Bearer $$TOKEN" https://localhost:9200/documents/_search -d '{"query":{"match_all":{}}}' 2>&1 | grep -v "Unauthorized" >/dev/null; then \ - echo "โœ“ OpenSearch JWT auth working after $$((i*2)) seconds"; \ - JWT_AUTH_READY=true; \ - break; \ - fi; \ + if curl -k -s -H "Authorization: Bearer $$TEST_TOKEN" https://localhost:9200/documents/_search -d '{"query":{"match_all":{}}}' 2>&1 | grep -v "Unauthorized" >/dev/null; then \ + echo "โœ“ OpenSearch JWT auth working after $$((i*2)) seconds"; \ + JWT_AUTH_READY=true; \ + break; \ fi; \ fi; \ sleep 2; \ From 54ca7a52a725b39d313841a4622fcbf26df0d97b Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 14:36:43 -0400 Subject: [PATCH 47/58] more logging --- Makefile | 7 +++++++ tests/conftest.py | 4 +++- tests/integration/test_api_endpoints.py | 25 +++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4eb6293b..ecb8f10f 100644 --- a/Makefile +++ b/Makefile @@ -211,8 +211,15 @@ test-ci: for i in $$(seq 1 60); do \ docker exec openrag-backend curl -s http://localhost:8000/.well-known/openid-configuration >/dev/null 2>&1 && break || sleep 2; \ done; \ + echo "Checking key files..."; \ + ls -la keys/; \ + echo "Public key fingerprint (host):"; \ + ssh-keygen -l -f keys/public_key.pem 2>/dev/null || openssl rsa -pubin -in keys/public_key.pem -text -noout | head -5; \ + echo "Public key fingerprint (container):"; \ + docker exec openrag-backend sh -c "ls -la /app/keys/ && openssl rsa -pubin -in /app/keys/public_key.pem -text -noout | head -5"; \ echo "Generating test JWT token..."; \ TEST_TOKEN=$$(uv run python -c "from src.session_manager import SessionManager, AnonymousUser; sm = SessionManager('test'); print(sm.create_jwt_token(AnonymousUser()))"); \ + echo "Test token (first 100 chars): $${TEST_TOKEN:0:100}..."; \ echo "Waiting for OpenSearch with JWT auth to work..."; \ JWT_AUTH_READY=false; \ for i in $$(seq 1 60); do \ diff --git a/tests/conftest.py b/tests/conftest.py index 27a6f750..7c2ffc1d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -45,7 +45,9 @@ def session_manager(): """Session manager for testing.""" # Generate RSA keys before creating SessionManager generate_jwt_keys() - return SessionManager("test-secret-key") + sm = SessionManager("test-secret-key") + print(f"[DEBUG] SessionManager created with keys: private={sm.private_key_path}, public={sm.public_key_path}") + return sm @pytest.fixture diff --git a/tests/integration/test_api_endpoints.py b/tests/integration/test_api_endpoints.py index 1d325a1b..caf7afff 100644 --- a/tests/integration/test_api_endpoints.py +++ b/tests/integration/test_api_endpoints.py @@ -13,11 +13,32 @@ async def wait_for_service_ready(client: httpx.AsyncClient, timeout_s: float = 3 - GET /auth/me should return 200 immediately (confirms app is up). - POST /search with query "*" avoids embeddings and checks OpenSearch/index readiness. """ + # First test OpenSearch JWT directly + from src.session_manager import SessionManager, AnonymousUser + import os + sm = SessionManager("test") + test_token = sm.create_jwt_token(AnonymousUser()) + print(f"[DEBUG] Generated test JWT token (first 50 chars): {test_token[:50]}...") + print(f"[DEBUG] Using key paths: private={sm.private_key_path}, public={sm.public_key_path}") + + # Test OpenSearch JWT auth directly + opensearch_url = f"https://{os.getenv('OPENSEARCH_HOST', 'localhost')}:{os.getenv('OPENSEARCH_PORT', '9200')}" + async with httpx.AsyncClient(verify=False) as os_client: + r_os = await os_client.post( + f"{opensearch_url}/documents/_search", + headers={"Authorization": f"Bearer {test_token}"}, + json={"query": {"match_all": {}}, "size": 0} + ) + print(f"[DEBUG] Direct OpenSearch JWT test: status={r_os.status_code}, body={r_os.text[:300]}") + if r_os.status_code == 401: + print(f"[DEBUG] OpenSearch rejected JWT! This means OIDC config is not working.") + deadline = asyncio.get_event_loop().time() + timeout_s last_err = None while asyncio.get_event_loop().time() < deadline: try: r1 = await client.get("/auth/me") + print(f"[DEBUG] /auth/me status={r1.status_code}, body={r1.text[:200]}") if r1.status_code in (401, 403): raise AssertionError(f"/auth/me returned {r1.status_code}: {r1.text}") if r1.status_code != 200: @@ -25,15 +46,19 @@ async def wait_for_service_ready(client: httpx.AsyncClient, timeout_s: float = 3 continue # match_all readiness probe; no embeddings r2 = await client.post("/search", json={"query": "*", "limit": 0}) + print(f"[DEBUG] /search status={r2.status_code}, body={r2.text[:200]}") if r2.status_code in (401, 403): + print(f"[DEBUG] Search failed with auth error. Response: {r2.text}") raise AssertionError(f"/search returned {r2.status_code}: {r2.text}") if r2.status_code == 200: + print("[DEBUG] Service ready!") return last_err = r2.text except AssertionError: raise except Exception as e: last_err = str(e) + print(f"[DEBUG] Exception during readiness check: {e}") await asyncio.sleep(0.5) raise AssertionError(f"Service not ready in time: {last_err}") From 767cb9e434818c42f55175e996208d7a98b600de Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 14:40:58 -0400 Subject: [PATCH 48/58] logging --- Makefile | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index ecb8f10f..a0fffd36 100644 --- a/Makefile +++ b/Makefile @@ -213,13 +213,12 @@ test-ci: done; \ echo "Checking key files..."; \ ls -la keys/; \ - echo "Public key fingerprint (host):"; \ - ssh-keygen -l -f keys/public_key.pem 2>/dev/null || openssl rsa -pubin -in keys/public_key.pem -text -noout | head -5; \ - echo "Public key fingerprint (container):"; \ - docker exec openrag-backend sh -c "ls -la /app/keys/ && openssl rsa -pubin -in /app/keys/public_key.pem -text -noout | head -5"; \ + echo "Public key content:"; \ + cat keys/public_key.pem; \ + echo "Private key content (first 5 lines):"; \ + head -5 keys/private_key.pem; \ echo "Generating test JWT token..."; \ TEST_TOKEN=$$(uv run python -c "from src.session_manager import SessionManager, AnonymousUser; sm = SessionManager('test'); print(sm.create_jwt_token(AnonymousUser()))"); \ - echo "Test token (first 100 chars): $${TEST_TOKEN:0:100}..."; \ echo "Waiting for OpenSearch with JWT auth to work..."; \ JWT_AUTH_READY=false; \ for i in $$(seq 1 60); do \ From e6caea0052e246fb7d1c283129b4e042f5e93fa0 Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 14:53:42 -0400 Subject: [PATCH 49/58] log hashes --- Makefile | 8 ++++---- tests/integration/test_api_endpoints.py | 14 +++++++++++--- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index a0fffd36..7d24ae2e 100644 --- a/Makefile +++ b/Makefile @@ -213,12 +213,12 @@ test-ci: done; \ echo "Checking key files..."; \ ls -la keys/; \ - echo "Public key content:"; \ - cat keys/public_key.pem; \ - echo "Private key content (first 5 lines):"; \ - head -5 keys/private_key.pem; \ + echo "Public key hash:"; \ + sha256sum keys/public_key.pem | cut -d' ' -f1 | cut -c1-16; \ echo "Generating test JWT token..."; \ TEST_TOKEN=$$(uv run python -c "from src.session_manager import SessionManager, AnonymousUser; sm = SessionManager('test'); print(sm.create_jwt_token(AnonymousUser()))"); \ + echo "Token hash:"; \ + echo "$$TEST_TOKEN" | sha256sum | cut -d' ' -f1 | cut -c1-16; \ echo "Waiting for OpenSearch with JWT auth to work..."; \ JWT_AUTH_READY=false; \ for i in $$(seq 1 60); do \ diff --git a/tests/integration/test_api_endpoints.py b/tests/integration/test_api_endpoints.py index caf7afff..4593df12 100644 --- a/tests/integration/test_api_endpoints.py +++ b/tests/integration/test_api_endpoints.py @@ -16,22 +16,30 @@ async def wait_for_service_ready(client: httpx.AsyncClient, timeout_s: float = 3 # First test OpenSearch JWT directly from src.session_manager import SessionManager, AnonymousUser import os + import hashlib sm = SessionManager("test") test_token = sm.create_jwt_token(AnonymousUser()) - print(f"[DEBUG] Generated test JWT token (first 50 chars): {test_token[:50]}...") + token_hash = hashlib.sha256(test_token.encode()).hexdigest()[:16] + print(f"[DEBUG] Generated test JWT token hash: {token_hash}") print(f"[DEBUG] Using key paths: private={sm.private_key_path}, public={sm.public_key_path}") + with open(sm.public_key_path, 'rb') as f: + pub_key_hash = hashlib.sha256(f.read()).hexdigest()[:16] + print(f"[DEBUG] Public key hash: {pub_key_hash}") # Test OpenSearch JWT auth directly opensearch_url = f"https://{os.getenv('OPENSEARCH_HOST', 'localhost')}:{os.getenv('OPENSEARCH_PORT', '9200')}" + print(f"[DEBUG] Testing JWT auth directly against: {opensearch_url}/documents/_search") async with httpx.AsyncClient(verify=False) as os_client: r_os = await os_client.post( f"{opensearch_url}/documents/_search", headers={"Authorization": f"Bearer {test_token}"}, json={"query": {"match_all": {}}, "size": 0} ) - print(f"[DEBUG] Direct OpenSearch JWT test: status={r_os.status_code}, body={r_os.text[:300]}") + print(f"[DEBUG] Direct OpenSearch JWT test: status={r_os.status_code}, body={r_os.text[:500]}") if r_os.status_code == 401: - print(f"[DEBUG] OpenSearch rejected JWT! This means OIDC config is not working.") + print(f"[DEBUG] โŒ OpenSearch rejected JWT! OIDC config not working.") + else: + print(f"[DEBUG] โœ“ OpenSearch accepted JWT!") deadline = asyncio.get_event_loop().time() + timeout_s last_err = None From 8e976290e1e880c9461e20e998dafe68e0248715 Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 15:07:29 -0400 Subject: [PATCH 50/58] claims --- tests/integration/test_api_endpoints.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/integration/test_api_endpoints.py b/tests/integration/test_api_endpoints.py index 4593df12..869928fe 100644 --- a/tests/integration/test_api_endpoints.py +++ b/tests/integration/test_api_endpoints.py @@ -17,6 +17,7 @@ async def wait_for_service_ready(client: httpx.AsyncClient, timeout_s: float = 3 from src.session_manager import SessionManager, AnonymousUser import os import hashlib + import jwt as jwt_lib sm = SessionManager("test") test_token = sm.create_jwt_token(AnonymousUser()) token_hash = hashlib.sha256(test_token.encode()).hexdigest()[:16] @@ -25,6 +26,9 @@ async def wait_for_service_ready(client: httpx.AsyncClient, timeout_s: float = 3 with open(sm.public_key_path, 'rb') as f: pub_key_hash = hashlib.sha256(f.read()).hexdigest()[:16] print(f"[DEBUG] Public key hash: {pub_key_hash}") + # Decode token to see claims + decoded = jwt_lib.decode(test_token, options={"verify_signature": False}) + print(f"[DEBUG] JWT claims: iss={decoded.get('iss')}, sub={decoded.get('sub')}, aud={decoded.get('aud')}, roles={decoded.get('roles')}") # Test OpenSearch JWT auth directly opensearch_url = f"https://{os.getenv('OPENSEARCH_HOST', 'localhost')}:{os.getenv('OPENSEARCH_PORT', '9200')}" From 9b917e6a239effa724b6856cd3daf8fe1b800b79 Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 15:07:32 -0400 Subject: [PATCH 51/58] claims --- Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 7d24ae2e..4091a5c4 100644 --- a/Makefile +++ b/Makefile @@ -213,12 +213,16 @@ test-ci: done; \ echo "Checking key files..."; \ ls -la keys/; \ - echo "Public key hash:"; \ + echo "Public key hash (host):"; \ sha256sum keys/public_key.pem | cut -d' ' -f1 | cut -c1-16; \ + echo "Public key hash (container):"; \ + docker exec openrag-backend sha256sum /app/keys/public_key.pem | cut -d' ' -f1 | cut -c1-16; \ echo "Generating test JWT token..."; \ TEST_TOKEN=$$(uv run python -c "from src.session_manager import SessionManager, AnonymousUser; sm = SessionManager('test'); print(sm.create_jwt_token(AnonymousUser()))"); \ - echo "Token hash:"; \ + echo "Token hash (host):"; \ echo "$$TEST_TOKEN" | sha256sum | cut -d' ' -f1 | cut -c1-16; \ + echo "Decoding JWT claims (host):"; \ + uv run python -c "import jwt, sys; sys.stdin.read(); tok='$$TEST_TOKEN'; print('iss:', jwt.decode(tok, options={'verify_signature': False}).get('iss')); print('aud:', jwt.decode(tok, options={'verify_signature': False}).get('aud')); print('roles:', jwt.decode(tok, options={'verify_signature': False}).get('roles'))"; \ echo "Waiting for OpenSearch with JWT auth to work..."; \ JWT_AUTH_READY=false; \ for i in $$(seq 1 60); do \ From 400cdc5059810d4335b2dee113e8e7a1c2d630de Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 15:08:08 -0400 Subject: [PATCH 52/58] claims --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4091a5c4..36ce54e1 100644 --- a/Makefile +++ b/Makefile @@ -222,7 +222,7 @@ test-ci: echo "Token hash (host):"; \ echo "$$TEST_TOKEN" | sha256sum | cut -d' ' -f1 | cut -c1-16; \ echo "Decoding JWT claims (host):"; \ - uv run python -c "import jwt, sys; sys.stdin.read(); tok='$$TEST_TOKEN'; print('iss:', jwt.decode(tok, options={'verify_signature': False}).get('iss')); print('aud:', jwt.decode(tok, options={'verify_signature': False}).get('aud')); print('roles:', jwt.decode(tok, options={'verify_signature': False}).get('roles'))"; \ + echo "$$TEST_TOKEN" | uv run python -c "import jwt, sys; tok=sys.stdin.read().strip(); claims=jwt.decode(tok, options={'verify_signature': False}); print('iss:', claims.get('iss'), 'aud:', claims.get('aud'), 'roles:', claims.get('roles'))"; \ echo "Waiting for OpenSearch with JWT auth to work..."; \ JWT_AUTH_READY=false; \ for i in $$(seq 1 60); do \ From 71b470c10749a6ce5795a3d2a02441f96b2af104 Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 15:14:50 -0400 Subject: [PATCH 53/58] content type/ --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 36ce54e1..203ab17b 100644 --- a/Makefile +++ b/Makefile @@ -227,7 +227,7 @@ test-ci: JWT_AUTH_READY=false; \ for i in $$(seq 1 60); do \ if curl -k -s https://localhost:9200 -u admin:$${OPENSEARCH_PASSWORD} >/dev/null 2>&1; then \ - if curl -k -s -H "Authorization: Bearer $$TEST_TOKEN" https://localhost:9200/documents/_search -d '{"query":{"match_all":{}}}' 2>&1 | grep -v "Unauthorized" >/dev/null; then \ + if curl -k -s -H "Authorization: Bearer $$TEST_TOKEN" -H "Content-Type: application/json" https://localhost:9200/documents/_search -d '{"query":{"match_all":{}}}' 2>&1 | grep -v "Unauthorized" >/dev/null; then \ echo "โœ“ OpenSearch JWT auth working after $$((i*2)) seconds"; \ JWT_AUTH_READY=true; \ break; \ From ccf54d08fb6873615206412219ae2e5bc24bfcaf Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 15:26:07 -0400 Subject: [PATCH 54/58] more verbose --- Makefile | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 203ab17b..9ef0da3d 100644 --- a/Makefile +++ b/Makefile @@ -211,6 +211,10 @@ test-ci: for i in $$(seq 1 60); do \ docker exec openrag-backend curl -s http://localhost:8000/.well-known/openid-configuration >/dev/null 2>&1 && break || sleep 2; \ done; \ + echo "Checking if OpenSearch security config was applied..."; \ + docker exec os ls -la /usr/share/opensearch/setup-security.sh 2>/dev/null || echo "setup-security.sh not found in container"; \ + echo "Checking OpenSearch security config:"; \ + docker exec os curl -k -u admin:$${OPENSEARCH_PASSWORD} https://localhost:9200/_opendistro/_security/api/securityconfig 2>/dev/null | head -50; \ echo "Checking key files..."; \ ls -la keys/; \ echo "Public key hash (host):"; \ @@ -227,8 +231,11 @@ test-ci: JWT_AUTH_READY=false; \ for i in $$(seq 1 60); do \ if curl -k -s https://localhost:9200 -u admin:$${OPENSEARCH_PASSWORD} >/dev/null 2>&1; then \ - if curl -k -s -H "Authorization: Bearer $$TEST_TOKEN" -H "Content-Type: application/json" https://localhost:9200/documents/_search -d '{"query":{"match_all":{}}}' 2>&1 | grep -v "Unauthorized" >/dev/null; then \ - echo "โœ“ OpenSearch JWT auth working after $$((i*2)) seconds"; \ + RESPONSE=$$(curl -k -s -H "Authorization: Bearer $$TEST_TOKEN" https://localhost:9200/documents/_search -d '{"query":{"match_all":{}}}' 2>&1); \ + echo "Attempt $$i response: $$RESPONSE"; \ + if echo "$$RESPONSE" | grep -v "Unauthorized" >/dev/null; then \ + echo "โœ“ OpenSearch JWT check passed after $$((i*2)) seconds"; \ + echo "Full response: $$RESPONSE"; \ JWT_AUTH_READY=true; \ break; \ fi; \ @@ -252,6 +259,9 @@ test-ci: echo "------------------------------------------------------------------------"; \ echo ""; \ exit 1; \ + else \ + echo "Dumping OpenSearch logs to verify OIDC is working:"; \ + docker logs os 2>&1 | grep -E "OIDC|openid|JWT|authenticat" | tail -20; \ fi; \ echo "Waiting for Langflow..."; \ for i in $$(seq 1 60); do \ From 67b40324b801bfac3250796b044884ae6dc6cf0c Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 15:39:28 -0400 Subject: [PATCH 55/58] http status code --- Makefile | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 9ef0da3d..1921bb69 100644 --- a/Makefile +++ b/Makefile @@ -211,10 +211,14 @@ test-ci: for i in $$(seq 1 60); do \ docker exec openrag-backend curl -s http://localhost:8000/.well-known/openid-configuration >/dev/null 2>&1 && break || sleep 2; \ done; \ - echo "Checking if OpenSearch security config was applied..."; \ - docker exec os ls -la /usr/share/opensearch/setup-security.sh 2>/dev/null || echo "setup-security.sh not found in container"; \ - echo "Checking OpenSearch security config:"; \ - docker exec os curl -k -u admin:$${OPENSEARCH_PASSWORD} https://localhost:9200/_opendistro/_security/api/securityconfig 2>/dev/null | head -50; \ + echo "Waiting for OpenSearch security config to be fully applied..."; \ + for i in $$(seq 1 60); do \ + if docker logs os 2>&1 | grep -q "Security configuration applied successfully"; then \ + echo "โœ“ Security configuration applied"; \ + break; \ + fi; \ + sleep 2; \ + done; \ echo "Checking key files..."; \ ls -la keys/; \ echo "Public key hash (host):"; \ @@ -231,11 +235,12 @@ test-ci: JWT_AUTH_READY=false; \ for i in $$(seq 1 60); do \ if curl -k -s https://localhost:9200 -u admin:$${OPENSEARCH_PASSWORD} >/dev/null 2>&1; then \ - RESPONSE=$$(curl -k -s -H "Authorization: Bearer $$TEST_TOKEN" https://localhost:9200/documents/_search -d '{"query":{"match_all":{}}}' 2>&1); \ - echo "Attempt $$i response: $$RESPONSE"; \ - if echo "$$RESPONSE" | grep -v "Unauthorized" >/dev/null; then \ - echo "โœ“ OpenSearch JWT check passed after $$((i*2)) seconds"; \ - echo "Full response: $$RESPONSE"; \ + HTTP_CODE=$$(curl -k -s -w "%{http_code}" -o /tmp/os_response.txt -H "Authorization: Bearer $$TEST_TOKEN" -H "Content-Type: application/json" https://localhost:9200/documents/_search -d '{"query":{"match_all":{}}}' 2>&1); \ + RESPONSE=$$(cat /tmp/os_response.txt); \ + echo "Attempt $$i: HTTP $$HTTP_CODE"; \ + echo "Response: $$RESPONSE"; \ + if [ "$$HTTP_CODE" = "200" ]; then \ + echo "โœ“ OpenSearch JWT auth working after $$((i*2)) seconds"; \ JWT_AUTH_READY=true; \ break; \ fi; \ From e8403cce6d7b232ba81f4c67828045b4937aaf39 Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 15:43:08 -0400 Subject: [PATCH 56/58] check security config --- Makefile | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Makefile b/Makefile index 1921bb69..086ae90a 100644 --- a/Makefile +++ b/Makefile @@ -219,6 +219,17 @@ test-ci: fi; \ sleep 2; \ done; \ + echo "Verifying OIDC authenticator is active in OpenSearch..."; \ + AUTHC_CONFIG=$$(curl -k -s -u admin:$${OPENSEARCH_PASSWORD} https://localhost:9200/_opendistro/_security/api/securityconfig 2>/dev/null); \ + if echo "$$AUTHC_CONFIG" | grep -q "openid_auth_domain"; then \ + echo "โœ“ OIDC authenticator configured"; \ + echo "$$AUTHC_CONFIG" | grep -A 5 "openid_auth_domain"; \ + else \ + echo "โœ— OIDC authenticator NOT found in security config!"; \ + echo "Security config:"; \ + echo "$$AUTHC_CONFIG" | head -50; \ + exit 1; \ + fi; \ echo "Checking key files..."; \ ls -la keys/; \ echo "Public key hash (host):"; \ From 49f615cb5063c65a7920f41672c9b9d30db2732f Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 Oct 2025 15:54:23 -0400 Subject: [PATCH 57/58] docker compose pull --- Makefile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Makefile b/Makefile index 086ae90a..b29dc47a 100644 --- a/Makefile +++ b/Makefile @@ -202,6 +202,10 @@ test-ci: chmod 600 keys/private_key.pem 2>/dev/null || true; \ chmod 644 keys/public_key.pem 2>/dev/null || true; \ fi; \ + echo "Cleaning up old containers and volumes..."; \ + docker compose -f docker-compose-cpu.yml down -v 2>/dev/null || true; \ + echo "Pulling latest images..."; \ + docker compose -f docker-compose-cpu.yml pull; \ echo "Starting infra (OpenSearch + Dashboards + Langflow) with CPU containers"; \ docker compose -f docker-compose-cpu.yml up -d opensearch dashboards langflow; \ echo "Starting docling-serve..."; \ @@ -230,6 +234,9 @@ test-ci: echo "$$AUTHC_CONFIG" | head -50; \ exit 1; \ fi; \ + echo "Checking if OpenSearch can reach backend OIDC endpoint..."; \ + docker exec os curl -s http://openrag-backend:8000/.well-known/openid-configuration | head -c 200; \ + echo ""; \ echo "Checking key files..."; \ ls -la keys/; \ echo "Public key hash (host):"; \ From 6abce7e4d7e79db0cf8e39454645d17da5a3d49b Mon Sep 17 00:00:00 2001 From: phact Date: Wed, 8 Oct 2025 09:41:32 -0400 Subject: [PATCH 58/58] post test jwt diag --- Makefile | 67 ++++++++++++-------------------------------------------- 1 file changed, 14 insertions(+), 53 deletions(-) diff --git a/Makefile b/Makefile index b29dc47a..b30f77fc 100644 --- a/Makefile +++ b/Makefile @@ -234,58 +234,6 @@ test-ci: echo "$$AUTHC_CONFIG" | head -50; \ exit 1; \ fi; \ - echo "Checking if OpenSearch can reach backend OIDC endpoint..."; \ - docker exec os curl -s http://openrag-backend:8000/.well-known/openid-configuration | head -c 200; \ - echo ""; \ - echo "Checking key files..."; \ - ls -la keys/; \ - echo "Public key hash (host):"; \ - sha256sum keys/public_key.pem | cut -d' ' -f1 | cut -c1-16; \ - echo "Public key hash (container):"; \ - docker exec openrag-backend sha256sum /app/keys/public_key.pem | cut -d' ' -f1 | cut -c1-16; \ - echo "Generating test JWT token..."; \ - TEST_TOKEN=$$(uv run python -c "from src.session_manager import SessionManager, AnonymousUser; sm = SessionManager('test'); print(sm.create_jwt_token(AnonymousUser()))"); \ - echo "Token hash (host):"; \ - echo "$$TEST_TOKEN" | sha256sum | cut -d' ' -f1 | cut -c1-16; \ - echo "Decoding JWT claims (host):"; \ - echo "$$TEST_TOKEN" | uv run python -c "import jwt, sys; tok=sys.stdin.read().strip(); claims=jwt.decode(tok, options={'verify_signature': False}); print('iss:', claims.get('iss'), 'aud:', claims.get('aud'), 'roles:', claims.get('roles'))"; \ - echo "Waiting for OpenSearch with JWT auth to work..."; \ - JWT_AUTH_READY=false; \ - for i in $$(seq 1 60); do \ - if curl -k -s https://localhost:9200 -u admin:$${OPENSEARCH_PASSWORD} >/dev/null 2>&1; then \ - HTTP_CODE=$$(curl -k -s -w "%{http_code}" -o /tmp/os_response.txt -H "Authorization: Bearer $$TEST_TOKEN" -H "Content-Type: application/json" https://localhost:9200/documents/_search -d '{"query":{"match_all":{}}}' 2>&1); \ - RESPONSE=$$(cat /tmp/os_response.txt); \ - echo "Attempt $$i: HTTP $$HTTP_CODE"; \ - echo "Response: $$RESPONSE"; \ - if [ "$$HTTP_CODE" = "200" ]; then \ - echo "โœ“ OpenSearch JWT auth working after $$((i*2)) seconds"; \ - JWT_AUTH_READY=true; \ - break; \ - fi; \ - fi; \ - sleep 2; \ - done; \ - if [ "$$JWT_AUTH_READY" = "false" ]; then \ - echo ""; \ - echo "========================================================================"; \ - echo "โœ— ERROR: OpenSearch JWT authentication failed to work after 120 seconds!"; \ - echo "========================================================================"; \ - echo ""; \ - echo "Dumping OpenSearch container logs:"; \ - echo "------------------------------------------------------------------------"; \ - docker logs os --tail 100; \ - echo "------------------------------------------------------------------------"; \ - echo ""; \ - echo "Dumping backend container logs:"; \ - echo "------------------------------------------------------------------------"; \ - docker logs openrag-backend --tail 50; \ - echo "------------------------------------------------------------------------"; \ - echo ""; \ - exit 1; \ - else \ - echo "Dumping OpenSearch logs to verify OIDC is working:"; \ - docker logs os 2>&1 | grep -E "OIDC|openid|JWT|authenticat" | tail -20; \ - fi; \ echo "Waiting for Langflow..."; \ for i in $$(seq 1 60); do \ curl -s http://localhost:7860/ >/dev/null 2>&1 && break || sleep 2; \ @@ -302,9 +250,22 @@ test-ci: OPENSEARCH_USERNAME=admin OPENSEARCH_PASSWORD=$${OPENSEARCH_PASSWORD} \ DISABLE_STARTUP_INGEST=$${DISABLE_STARTUP_INGEST:-true} \ uv run pytest tests/integration -vv -s -o log_cli=true --log-cli-level=DEBUG; \ + TEST_RESULT=$$?; \ + echo ""; \ + echo "=== Post-test JWT diagnostics ==="; \ + echo "Generating test JWT token..."; \ + TEST_TOKEN=$$(uv run python -c "from src.session_manager import SessionManager, AnonymousUser; sm = SessionManager('test'); print(sm.create_jwt_token(AnonymousUser()))" 2>/dev/null || echo ""); \ + if [ -n "$$TEST_TOKEN" ]; then \ + echo "Testing JWT against OpenSearch..."; \ + HTTP_CODE=$$(curl -k -s -w "%{http_code}" -o /tmp/os_diag.txt -H "Authorization: Bearer $$TEST_TOKEN" -H "Content-Type: application/json" https://localhost:9200/documents/_search -d '{"query":{"match_all":{}}}' 2>&1); \ + echo "HTTP $$HTTP_CODE: $$(cat /tmp/os_diag.txt | head -c 150)"; \ + fi; \ + echo "================================="; \ + echo ""; \ echo "Tearing down infra"; \ uv run python scripts/docling_ctl.py stop || true; \ - docker compose down -v || true + docker compose down -v || true; \ + exit $$TEST_RESULT lint: @echo "๐Ÿ” Running linting checks..."