From 95bdc59af0bad389fca636b92f30621cbe2961b3 Mon Sep 17 00:00:00 2001 From: Lucas Oliveira Date: Mon, 6 Oct 2025 09:45:33 -0300 Subject: [PATCH] Use opensearch queries on documents and processors --- src/api/documents.py | 32 ++++++-------------------------- src/models/processors.py | 20 ++++---------------- 2 files changed, 10 insertions(+), 42 deletions(-) diff --git a/src/api/documents.py b/src/api/documents.py index a367c9fe..048f746a 100644 --- a/src/api/documents.py +++ b/src/api/documents.py @@ -23,20 +23,9 @@ async def check_filename_exists(request: Request, document_service, session_mana ) # Search for any document with this exact filename - # Try both .keyword (exact match) and regular field (analyzed match) - search_body = { - "query": { - "bool": { - "should": [ - {"term": {"filename.keyword": filename}}, - {"term": {"filename": filename}} - ], - "minimum_should_match": 1 - } - }, - "size": 1, - "_source": ["filename"] - } + from utils.opensearch_queries import build_filename_search_body + + search_body = build_filename_search_body(filename, size=1, source=["filename"]) logger.debug(f"Checking filename existence: {filename}") @@ -83,18 +72,9 @@ async def delete_documents_by_filename(request: Request, document_service, sessi ) # Delete by query to remove all chunks of this document - # Use both .keyword and regular field to ensure we catch all variations - delete_query = { - "query": { - "bool": { - "should": [ - {"term": {"filename.keyword": filename}}, - {"term": {"filename": filename}} - ], - "minimum_should_match": 1 - } - } - } + from utils.opensearch_queries import build_filename_delete_body + + delete_query = build_filename_delete_body(filename) logger.debug(f"Deleting documents with filename: {filename}") diff --git a/src/models/processors.py b/src/models/processors.py index 6d7b74b4..4a5d96b5 100644 --- a/src/models/processors.py +++ b/src/models/processors.py @@ -65,6 +65,7 @@ class TaskProcessor: Returns True if any chunks with this filename exist. """ from config.settings import INDEX_NAME + from utils.opensearch_queries import build_filename_search_body import asyncio max_retries = 3 @@ -73,15 +74,7 @@ class TaskProcessor: for attempt in range(max_retries): try: # Search for any document with this exact filename - search_body = { - "query": { - "term": { - "filename.keyword": filename - } - }, - "size": 1, - "_source": False - } + search_body = build_filename_search_body(filename, size=1, source=False) response = await opensearch_client.search( index=INDEX_NAME, @@ -126,16 +119,11 @@ class TaskProcessor: Delete all chunks of a document with the given filename from OpenSearch. """ from config.settings import INDEX_NAME + from utils.opensearch_queries import build_filename_delete_body try: # Delete all documents with this filename - delete_body = { - "query": { - "term": { - "filename.keyword": filename - } - } - } + delete_body = build_filename_delete_body(filename) response = await opensearch_client.delete_by_query( index=INDEX_NAME,