diff --git a/src/main.py b/src/main.py
index 5221d432..8f714be9 100644
--- a/src/main.py
+++ b/src/main.py
@@ -85,6 +85,9 @@ logger.info(
     cuda_version=torch.version.cuda,
 )
 
+# Files to exclude from startup ingestion
+EXCLUDED_INGESTION_FILES = {"warmup_ocr.pdf"}
+
 
 async def wait_for_opensearch():
     """Wait for OpenSearch to be ready with retries"""
@@ -312,11 +315,12 @@ async def ingest_default_documents_when_ready(services):
             )
             return
 
-        # Collect files recursively
+        # Collect files recursively, excluding warmup files
         file_paths = [
             os.path.join(root, fn)
             for root, _, files in os.walk(base_dir)
             for fn in files
+            if fn not in EXCLUDED_INGESTION_FILES
         ]
 
         if not file_paths:
diff --git a/tests/integration/test_startup_ingest.py b/tests/integration/test_startup_ingest.py
index b2243b33..44d1e8b2 100644
--- a/tests/integration/test_startup_ingest.py
+++ b/tests/integration/test_startup_ingest.py
@@ -5,6 +5,9 @@ from pathlib import Path
 import httpx
 import pytest
 
+# Files to exclude from ingestion (should match src/main.py)
+EXCLUDED_INGESTION_FILES = {"warmup_ocr.pdf"}
+
 
 async def wait_for_ready(client: httpx.AsyncClient, timeout_s: float = 30.0):
     deadline = asyncio.get_event_loop().time() + timeout_s
@@ -29,7 +32,7 @@ def count_files_in_documents() -> int:
     base_dir = Path(os.getcwd()) / "documents"
     if not base_dir.is_dir():
         return 0
-    return sum(1 for _ in base_dir.rglob("*") if _.is_file())
+    return sum(1 for _ in base_dir.rglob("*") if _.is_file() and _.name not in EXCLUDED_INGESTION_FILES)
 
 
 @pytest.mark.parametrize("disable_langflow_ingest", [True, False])