fix: Make sure we exclude the warmup file ingestion
This commit is contained in:
parent
3216d866f7
commit
cfe7f6b581
2 changed files with 9 additions and 2 deletions
|
|
@ -85,6 +85,9 @@ logger.info(
|
|||
cuda_version=torch.version.cuda,
|
||||
)
|
||||
|
||||
# Files to exclude from startup ingestion
|
||||
EXCLUDED_INGESTION_FILES = {"warmup_ocr.pdf"}
|
||||
|
||||
|
||||
async def wait_for_opensearch():
|
||||
"""Wait for OpenSearch to be ready with retries"""
|
||||
|
|
@ -312,11 +315,12 @@ async def ingest_default_documents_when_ready(services):
|
|||
)
|
||||
return
|
||||
|
||||
# Collect files recursively
|
||||
# Collect files recursively, excluding warmup files
|
||||
file_paths = [
|
||||
os.path.join(root, fn)
|
||||
for root, _, files in os.walk(base_dir)
|
||||
for fn in files
|
||||
if fn not in EXCLUDED_INGESTION_FILES
|
||||
]
|
||||
|
||||
if not file_paths:
|
||||
|
|
|
|||
|
|
@ -5,6 +5,9 @@ from pathlib import Path
|
|||
import httpx
|
||||
import pytest
|
||||
|
||||
# Files to exclude from ingestion (should match src/main.py)
|
||||
EXCLUDED_INGESTION_FILES = {"warmup_ocr.pdf"}
|
||||
|
||||
|
||||
async def wait_for_ready(client: httpx.AsyncClient, timeout_s: float = 30.0):
|
||||
deadline = asyncio.get_event_loop().time() + timeout_s
|
||||
|
|
@ -29,7 +32,7 @@ def count_files_in_documents() -> int:
|
|||
base_dir = Path(os.getcwd()) / "documents"
|
||||
if not base_dir.is_dir():
|
||||
return 0
|
||||
return sum(1 for _ in base_dir.rglob("*") if _.is_file())
|
||||
return sum(1 for _ in base_dir.rglob("*") if _.is_file() and _.name not in EXCLUDED_INGESTION_FILES)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue