Use opensearch queries on documents and processors

This commit is contained in:
Lucas Oliveira 2025-10-06 09:45:33 -03:00
parent eef9e380dc
commit 95bdc59af0
2 changed files with 10 additions and 42 deletions

View file

@ -23,20 +23,9 @@ async def check_filename_exists(request: Request, document_service, session_mana
)
# Search for any document with this exact filename
# Try both .keyword (exact match) and regular field (analyzed match)
search_body = {
"query": {
"bool": {
"should": [
{"term": {"filename.keyword": filename}},
{"term": {"filename": filename}}
],
"minimum_should_match": 1
}
},
"size": 1,
"_source": ["filename"]
}
from utils.opensearch_queries import build_filename_search_body
search_body = build_filename_search_body(filename, size=1, source=["filename"])
logger.debug(f"Checking filename existence: {filename}")
@ -83,18 +72,9 @@ async def delete_documents_by_filename(request: Request, document_service, sessi
)
# Delete by query to remove all chunks of this document
# Use both .keyword and regular field to ensure we catch all variations
delete_query = {
"query": {
"bool": {
"should": [
{"term": {"filename.keyword": filename}},
{"term": {"filename": filename}}
],
"minimum_should_match": 1
}
}
}
from utils.opensearch_queries import build_filename_delete_body
delete_query = build_filename_delete_body(filename)
logger.debug(f"Deleting documents with filename: {filename}")

View file

@ -65,6 +65,7 @@ class TaskProcessor:
Returns True if any chunks with this filename exist.
"""
from config.settings import INDEX_NAME
from utils.opensearch_queries import build_filename_search_body
import asyncio
max_retries = 3
@ -73,15 +74,7 @@ class TaskProcessor:
for attempt in range(max_retries):
try:
# Search for any document with this exact filename
search_body = {
"query": {
"term": {
"filename.keyword": filename
}
},
"size": 1,
"_source": False
}
search_body = build_filename_search_body(filename, size=1, source=False)
response = await opensearch_client.search(
index=INDEX_NAME,
@ -126,16 +119,11 @@ class TaskProcessor:
Delete all chunks of a document with the given filename from OpenSearch.
"""
from config.settings import INDEX_NAME
from utils.opensearch_queries import build_filename_delete_body
try:
# Delete all documents with this filename
delete_body = {
"query": {
"term": {
"filename.keyword": filename
}
}
}
delete_body = build_filename_delete_body(filename)
response = await opensearch_client.delete_by_query(
index=INDEX_NAME,