dockerize app
This commit is contained in:
parent
39efea8612
commit
84c070181c
6 changed files with 131 additions and 9 deletions
58
Dockerfile.app
Normal file
58
Dockerfile.app
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
FROM node:18-slim
|
||||
|
||||
# Install Python, uv, and curl
|
||||
RUN apt-get update && apt-get install -y \
|
||||
python3 \
|
||||
python3-pip \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install uv
|
||||
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
ENV PATH="/root/.local/bin:$PATH"
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Copy Python dependencies
|
||||
COPY pyproject.toml uv.lock ./
|
||||
RUN uv sync
|
||||
|
||||
# Copy Python source
|
||||
COPY src/ ./src/
|
||||
|
||||
# Copy sample document and warmup script
|
||||
COPY documents/2506.08231v1.pdf ./
|
||||
COPY warm_up_docling.py ./
|
||||
RUN uv run python warm_up_docling.py && rm warm_up_docling.py 2506.08231v1.pdf
|
||||
|
||||
# Copy frontend dependencies
|
||||
COPY frontend/package*.json ./frontend/
|
||||
RUN cd frontend && npm install
|
||||
|
||||
# Copy frontend source
|
||||
COPY frontend/ ./frontend/
|
||||
|
||||
# Build frontend
|
||||
RUN cd frontend && npm run build
|
||||
|
||||
# Create startup script
|
||||
RUN echo '#!/bin/bash\n\
|
||||
set -e\n\
|
||||
echo "Starting Python backend..."\n\
|
||||
uv run python src/app.py &\n\
|
||||
BACKEND_PID=$!\n\
|
||||
echo "Waiting for backend to be ready..."\n\
|
||||
until curl -f http://localhost:8000/search -X POST -H "Content-Type: application/json" -d "{\"query\":\"test\"}" > /dev/null 2>&1; do\n\
|
||||
echo "Backend not ready yet, waiting..."\n\
|
||||
sleep 2\n\
|
||||
done\n\
|
||||
echo "Backend is ready! Starting Frontend..."\n\
|
||||
cd frontend && npm start &\n\
|
||||
wait' > /app/start.sh && chmod +x /app/start.sh
|
||||
|
||||
# Expose only frontend port
|
||||
EXPOSE 3000
|
||||
|
||||
# Start both services
|
||||
CMD ["/app/start.sh"]
|
||||
|
|
@ -23,3 +23,25 @@ services:
|
|||
ports:
|
||||
- "5601:5601"
|
||||
|
||||
gendb:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.app
|
||||
container_name: gendb-app
|
||||
depends_on:
|
||||
- opensearch
|
||||
environment:
|
||||
- OPENSEARCH_HOST=opensearch
|
||||
- OPENSEARCH_PORT=9200
|
||||
- OPENSEARCH_USERNAME=admin
|
||||
- OPENSEARCH_PASSWORD=OSisgendb1!
|
||||
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||
ports:
|
||||
- "3000:3000"
|
||||
volumes:
|
||||
- ./src:/app/src
|
||||
- ./frontend/src:/app/frontend/src
|
||||
- ./pyproject.toml:/app/pyproject.toml
|
||||
- ./uv.lock:/app/uv.lock
|
||||
- ./documents:/app/documents
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,10 @@ const nextConfig: NextConfig = {
|
|||
},
|
||||
];
|
||||
},
|
||||
// Increase timeout for API routes
|
||||
experimental: {
|
||||
proxyTimeout: 300000, // 5 minutes
|
||||
},
|
||||
};
|
||||
|
||||
export default nextConfig;
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ export default function AdminPage() {
|
|||
const [fileUploadLoading, setFileUploadLoading] = useState(false)
|
||||
const [pathUploadLoading, setPathUploadLoading] = useState(false)
|
||||
const [selectedFile, setSelectedFile] = useState<File | null>(null)
|
||||
const [folderPath, setFolderPath] = useState("")
|
||||
const [folderPath, setFolderPath] = useState("/app/documents/")
|
||||
const [uploadStatus, setUploadStatus] = useState<string>("")
|
||||
|
||||
const handleFileUpload = async (e: React.FormEvent) => {
|
||||
|
|
|
|||
41
src/app.py
41
src/app.py
|
|
@ -8,6 +8,7 @@ os.environ['USE_CPU_ONLY'] = 'true'
|
|||
import hashlib
|
||||
import tempfile
|
||||
import asyncio
|
||||
import time
|
||||
|
||||
from starlette.applications import Starlette
|
||||
from starlette.requests import Request
|
||||
|
|
@ -25,14 +26,19 @@ from openai import OpenAI
|
|||
converter = DocumentConverter() # basic converter; tweak via PipelineOptions if you need OCR, etc.
|
||||
|
||||
# Initialize Async OpenSearch (adjust hosts/auth as needed)
|
||||
opensearch_host = os.getenv("OPENSEARCH_HOST", "localhost")
|
||||
opensearch_port = int(os.getenv("OPENSEARCH_PORT", "9200"))
|
||||
opensearch_username = os.getenv("OPENSEARCH_USERNAME", "admin")
|
||||
opensearch_password = os.getenv("OPENSEARCH_PASSWORD", "OSisgendb1!")
|
||||
|
||||
es = AsyncOpenSearch(
|
||||
hosts=[{"host": "localhost", "port": 9200}],
|
||||
hosts=[{"host": opensearch_host, "port": opensearch_port}],
|
||||
connection_class=AIOHttpConnection,
|
||||
scheme="https",
|
||||
use_ssl=True,
|
||||
verify_certs=False,
|
||||
ssl_assert_fingerprint=None,
|
||||
http_auth=("admin","OSisgendb1!"),
|
||||
http_auth=(opensearch_username, opensearch_password),
|
||||
http_compress=True,
|
||||
)
|
||||
|
||||
|
|
@ -71,7 +77,26 @@ index_body = {
|
|||
|
||||
client = patch_openai_with_mcp(OpenAI()) # Get the patched client back
|
||||
|
||||
async def wait_for_opensearch():
|
||||
"""Wait for OpenSearch to be ready with retries"""
|
||||
max_retries = 30
|
||||
retry_delay = 2
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
await es.info()
|
||||
print("OpenSearch is ready!")
|
||||
return
|
||||
except Exception as e:
|
||||
print(f"Attempt {attempt + 1}/{max_retries}: OpenSearch not ready yet ({e})")
|
||||
if attempt < max_retries - 1:
|
||||
await asyncio.sleep(retry_delay)
|
||||
else:
|
||||
raise Exception("OpenSearch failed to become ready")
|
||||
|
||||
async def init_index():
|
||||
await wait_for_opensearch()
|
||||
|
||||
if not await es.indices.exists(index=INDEX_NAME):
|
||||
await es.indices.create(index=INDEX_NAME, body=index_body)
|
||||
print(f"Created index '{INDEX_NAME}'")
|
||||
|
|
@ -133,9 +158,9 @@ async def process_file_common(file_path: str, file_hash: str = None):
|
|||
sha256.update(chunk)
|
||||
file_hash = sha256.hexdigest()
|
||||
|
||||
#exists = await es.exists(index=INDEX_NAME, id=file_hash)
|
||||
#if exists:
|
||||
# return {"status": "unchanged", "id": file_hash}
|
||||
exists = await es.exists(index=INDEX_NAME, id=file_hash)
|
||||
if exists:
|
||||
return {"status": "unchanged", "id": file_hash}
|
||||
|
||||
# convert and extract
|
||||
# TODO: Check if docling can handle in-memory bytes instead of file path
|
||||
|
|
@ -186,9 +211,9 @@ async def upload(request: Request):
|
|||
tmp.flush()
|
||||
|
||||
file_hash = sha256.hexdigest()
|
||||
#exists = await es.exists(index=INDEX_NAME, id=file_hash)
|
||||
#if exists:
|
||||
# return JSONResponse({"status": "unchanged", "id": file_hash})
|
||||
exists = await es.exists(index=INDEX_NAME, id=file_hash)
|
||||
if exists:
|
||||
return JSONResponse({"status": "unchanged", "id": file_hash})
|
||||
|
||||
result = await process_file_common(tmp.name, file_hash)
|
||||
return JSONResponse(result)
|
||||
|
|
|
|||
13
warm_up_docling.py
Normal file
13
warm_up_docling.py
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
from docling.document_converter import DocumentConverter
|
||||
|
||||
print('Warming up docling models...')
|
||||
|
||||
try:
|
||||
# Use the sample document to warm up docling
|
||||
test_file = "/app/2506.08231v1.pdf"
|
||||
print(f'Using {test_file} to warm up docling...')
|
||||
DocumentConverter().convert(test_file)
|
||||
print('Docling models warmed up successfully')
|
||||
except Exception as e:
|
||||
print(f'Docling warm-up completed with: {e}')
|
||||
# This is expected - we just want to trigger the model downloads
|
||||
Loading…
Add table
Reference in a new issue