dockerize app

This commit is contained in:
phact 2025-07-11 02:02:18 -04:00
parent 39efea8612
commit 84c070181c
6 changed files with 131 additions and 9 deletions

58
Dockerfile.app Normal file
View file

@ -0,0 +1,58 @@
FROM node:18-slim
# Install Python, uv, and curl
RUN apt-get update && apt-get install -y \
python3 \
python3-pip \
curl \
&& rm -rf /var/lib/apt/lists/*
# Install uv
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
ENV PATH="/root/.local/bin:$PATH"
# Set working directory
WORKDIR /app
# Copy Python dependencies
COPY pyproject.toml uv.lock ./
RUN uv sync
# Copy Python source
COPY src/ ./src/
# Copy sample document and warmup script
COPY documents/2506.08231v1.pdf ./
COPY warm_up_docling.py ./
RUN uv run python warm_up_docling.py && rm warm_up_docling.py 2506.08231v1.pdf
# Copy frontend dependencies
COPY frontend/package*.json ./frontend/
RUN cd frontend && npm install
# Copy frontend source
COPY frontend/ ./frontend/
# Build frontend
RUN cd frontend && npm run build
# Create startup script
RUN echo '#!/bin/bash\n\
set -e\n\
echo "Starting Python backend..."\n\
uv run python src/app.py &\n\
BACKEND_PID=$!\n\
echo "Waiting for backend to be ready..."\n\
until curl -f http://localhost:8000/search -X POST -H "Content-Type: application/json" -d "{\"query\":\"test\"}" > /dev/null 2>&1; do\n\
echo "Backend not ready yet, waiting..."\n\
sleep 2\n\
done\n\
echo "Backend is ready! Starting Frontend..."\n\
cd frontend && npm start &\n\
wait' > /app/start.sh && chmod +x /app/start.sh
# Expose only frontend port
EXPOSE 3000
# Start both services
CMD ["/app/start.sh"]

View file

@ -23,3 +23,25 @@ services:
ports:
- "5601:5601"
gendb:
build:
context: .
dockerfile: Dockerfile.app
container_name: gendb-app
depends_on:
- opensearch
environment:
- OPENSEARCH_HOST=opensearch
- OPENSEARCH_PORT=9200
- OPENSEARCH_USERNAME=admin
- OPENSEARCH_PASSWORD=OSisgendb1!
- OPENAI_API_KEY=${OPENAI_API_KEY}
ports:
- "3000:3000"
volumes:
- ./src:/app/src
- ./frontend/src:/app/frontend/src
- ./pyproject.toml:/app/pyproject.toml
- ./uv.lock:/app/uv.lock
- ./documents:/app/documents

View file

@ -9,6 +9,10 @@ const nextConfig: NextConfig = {
},
];
},
// Increase timeout for API routes
experimental: {
proxyTimeout: 300000, // 5 minutes
},
};
export default nextConfig;

View file

@ -11,7 +11,7 @@ export default function AdminPage() {
const [fileUploadLoading, setFileUploadLoading] = useState(false)
const [pathUploadLoading, setPathUploadLoading] = useState(false)
const [selectedFile, setSelectedFile] = useState<File | null>(null)
const [folderPath, setFolderPath] = useState("")
const [folderPath, setFolderPath] = useState("/app/documents/")
const [uploadStatus, setUploadStatus] = useState<string>("")
const handleFileUpload = async (e: React.FormEvent) => {

View file

@ -8,6 +8,7 @@ os.environ['USE_CPU_ONLY'] = 'true'
import hashlib
import tempfile
import asyncio
import time
from starlette.applications import Starlette
from starlette.requests import Request
@ -25,14 +26,19 @@ from openai import OpenAI
converter = DocumentConverter() # basic converter; tweak via PipelineOptions if you need OCR, etc.
# Initialize Async OpenSearch (adjust hosts/auth as needed)
opensearch_host = os.getenv("OPENSEARCH_HOST", "localhost")
opensearch_port = int(os.getenv("OPENSEARCH_PORT", "9200"))
opensearch_username = os.getenv("OPENSEARCH_USERNAME", "admin")
opensearch_password = os.getenv("OPENSEARCH_PASSWORD", "OSisgendb1!")
es = AsyncOpenSearch(
hosts=[{"host": "localhost", "port": 9200}],
hosts=[{"host": opensearch_host, "port": opensearch_port}],
connection_class=AIOHttpConnection,
scheme="https",
use_ssl=True,
verify_certs=False,
ssl_assert_fingerprint=None,
http_auth=("admin","OSisgendb1!"),
http_auth=(opensearch_username, opensearch_password),
http_compress=True,
)
@ -71,7 +77,26 @@ index_body = {
client = patch_openai_with_mcp(OpenAI()) # Get the patched client back
async def wait_for_opensearch():
"""Wait for OpenSearch to be ready with retries"""
max_retries = 30
retry_delay = 2
for attempt in range(max_retries):
try:
await es.info()
print("OpenSearch is ready!")
return
except Exception as e:
print(f"Attempt {attempt + 1}/{max_retries}: OpenSearch not ready yet ({e})")
if attempt < max_retries - 1:
await asyncio.sleep(retry_delay)
else:
raise Exception("OpenSearch failed to become ready")
async def init_index():
await wait_for_opensearch()
if not await es.indices.exists(index=INDEX_NAME):
await es.indices.create(index=INDEX_NAME, body=index_body)
print(f"Created index '{INDEX_NAME}'")
@ -133,9 +158,9 @@ async def process_file_common(file_path: str, file_hash: str = None):
sha256.update(chunk)
file_hash = sha256.hexdigest()
#exists = await es.exists(index=INDEX_NAME, id=file_hash)
#if exists:
# return {"status": "unchanged", "id": file_hash}
exists = await es.exists(index=INDEX_NAME, id=file_hash)
if exists:
return {"status": "unchanged", "id": file_hash}
# convert and extract
# TODO: Check if docling can handle in-memory bytes instead of file path
@ -186,9 +211,9 @@ async def upload(request: Request):
tmp.flush()
file_hash = sha256.hexdigest()
#exists = await es.exists(index=INDEX_NAME, id=file_hash)
#if exists:
# return JSONResponse({"status": "unchanged", "id": file_hash})
exists = await es.exists(index=INDEX_NAME, id=file_hash)
if exists:
return JSONResponse({"status": "unchanged", "id": file_hash})
result = await process_file_common(tmp.name, file_hash)
return JSONResponse(result)

13
warm_up_docling.py Normal file
View file

@ -0,0 +1,13 @@
from docling.document_converter import DocumentConverter
print('Warming up docling models...')
try:
# Use the sample document to warm up docling
test_file = "/app/2506.08231v1.pdf"
print(f'Using {test_file} to warm up docling...')
DocumentConverter().convert(test_file)
print('Docling models warmed up successfully')
except Exception as e:
print(f'Docling warm-up completed with: {e}')
# This is expected - we just want to trigger the model downloads