fix: Make sure we exclude the warmup file ingestion
This commit is contained in:
parent
3216d866f7
commit
cfe7f6b581
2 changed files with 9 additions and 2 deletions
|
|
@ -85,6 +85,9 @@ logger.info(
|
||||||
cuda_version=torch.version.cuda,
|
cuda_version=torch.version.cuda,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Files to exclude from startup ingestion
|
||||||
|
EXCLUDED_INGESTION_FILES = {"warmup_ocr.pdf"}
|
||||||
|
|
||||||
|
|
||||||
async def wait_for_opensearch():
|
async def wait_for_opensearch():
|
||||||
"""Wait for OpenSearch to be ready with retries"""
|
"""Wait for OpenSearch to be ready with retries"""
|
||||||
|
|
@ -312,11 +315,12 @@ async def ingest_default_documents_when_ready(services):
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
# Collect files recursively
|
# Collect files recursively, excluding warmup files
|
||||||
file_paths = [
|
file_paths = [
|
||||||
os.path.join(root, fn)
|
os.path.join(root, fn)
|
||||||
for root, _, files in os.walk(base_dir)
|
for root, _, files in os.walk(base_dir)
|
||||||
for fn in files
|
for fn in files
|
||||||
|
if fn not in EXCLUDED_INGESTION_FILES
|
||||||
]
|
]
|
||||||
|
|
||||||
if not file_paths:
|
if not file_paths:
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,9 @@ from pathlib import Path
|
||||||
import httpx
|
import httpx
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
# Files to exclude from ingestion (should match src/main.py)
|
||||||
|
EXCLUDED_INGESTION_FILES = {"warmup_ocr.pdf"}
|
||||||
|
|
||||||
|
|
||||||
async def wait_for_ready(client: httpx.AsyncClient, timeout_s: float = 30.0):
|
async def wait_for_ready(client: httpx.AsyncClient, timeout_s: float = 30.0):
|
||||||
deadline = asyncio.get_event_loop().time() + timeout_s
|
deadline = asyncio.get_event_loop().time() + timeout_s
|
||||||
|
|
@ -29,7 +32,7 @@ def count_files_in_documents() -> int:
|
||||||
base_dir = Path(os.getcwd()) / "documents"
|
base_dir = Path(os.getcwd()) / "documents"
|
||||||
if not base_dir.is_dir():
|
if not base_dir.is_dir():
|
||||||
return 0
|
return 0
|
||||||
return sum(1 for _ in base_dir.rglob("*") if _.is_file())
|
return sum(1 for _ in base_dir.rglob("*") if _.is_file() and _.name not in EXCLUDED_INGESTION_FILES)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
|
@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue