dockerize app
This commit is contained in:
parent
39efea8612
commit
84c070181c
6 changed files with 131 additions and 9 deletions
58
Dockerfile.app
Normal file
58
Dockerfile.app
Normal file
|
|
@ -0,0 +1,58 @@
|
||||||
|
FROM node:18-slim
|
||||||
|
|
||||||
|
# Install Python, uv, and curl
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
python3 \
|
||||||
|
python3-pip \
|
||||||
|
curl \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Install uv
|
||||||
|
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
ENV PATH="/root/.local/bin:$PATH"
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy Python dependencies
|
||||||
|
COPY pyproject.toml uv.lock ./
|
||||||
|
RUN uv sync
|
||||||
|
|
||||||
|
# Copy Python source
|
||||||
|
COPY src/ ./src/
|
||||||
|
|
||||||
|
# Copy sample document and warmup script
|
||||||
|
COPY documents/2506.08231v1.pdf ./
|
||||||
|
COPY warm_up_docling.py ./
|
||||||
|
RUN uv run python warm_up_docling.py && rm warm_up_docling.py 2506.08231v1.pdf
|
||||||
|
|
||||||
|
# Copy frontend dependencies
|
||||||
|
COPY frontend/package*.json ./frontend/
|
||||||
|
RUN cd frontend && npm install
|
||||||
|
|
||||||
|
# Copy frontend source
|
||||||
|
COPY frontend/ ./frontend/
|
||||||
|
|
||||||
|
# Build frontend
|
||||||
|
RUN cd frontend && npm run build
|
||||||
|
|
||||||
|
# Create startup script
|
||||||
|
RUN echo '#!/bin/bash\n\
|
||||||
|
set -e\n\
|
||||||
|
echo "Starting Python backend..."\n\
|
||||||
|
uv run python src/app.py &\n\
|
||||||
|
BACKEND_PID=$!\n\
|
||||||
|
echo "Waiting for backend to be ready..."\n\
|
||||||
|
until curl -f http://localhost:8000/search -X POST -H "Content-Type: application/json" -d "{\"query\":\"test\"}" > /dev/null 2>&1; do\n\
|
||||||
|
echo "Backend not ready yet, waiting..."\n\
|
||||||
|
sleep 2\n\
|
||||||
|
done\n\
|
||||||
|
echo "Backend is ready! Starting Frontend..."\n\
|
||||||
|
cd frontend && npm start &\n\
|
||||||
|
wait' > /app/start.sh && chmod +x /app/start.sh
|
||||||
|
|
||||||
|
# Expose only frontend port
|
||||||
|
EXPOSE 3000
|
||||||
|
|
||||||
|
# Start both services
|
||||||
|
CMD ["/app/start.sh"]
|
||||||
|
|
@ -23,3 +23,25 @@ services:
|
||||||
ports:
|
ports:
|
||||||
- "5601:5601"
|
- "5601:5601"
|
||||||
|
|
||||||
|
gendb:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile.app
|
||||||
|
container_name: gendb-app
|
||||||
|
depends_on:
|
||||||
|
- opensearch
|
||||||
|
environment:
|
||||||
|
- OPENSEARCH_HOST=opensearch
|
||||||
|
- OPENSEARCH_PORT=9200
|
||||||
|
- OPENSEARCH_USERNAME=admin
|
||||||
|
- OPENSEARCH_PASSWORD=OSisgendb1!
|
||||||
|
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
volumes:
|
||||||
|
- ./src:/app/src
|
||||||
|
- ./frontend/src:/app/frontend/src
|
||||||
|
- ./pyproject.toml:/app/pyproject.toml
|
||||||
|
- ./uv.lock:/app/uv.lock
|
||||||
|
- ./documents:/app/documents
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,10 @@ const nextConfig: NextConfig = {
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
},
|
},
|
||||||
|
// Increase timeout for API routes
|
||||||
|
experimental: {
|
||||||
|
proxyTimeout: 300000, // 5 minutes
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
export default nextConfig;
|
export default nextConfig;
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ export default function AdminPage() {
|
||||||
const [fileUploadLoading, setFileUploadLoading] = useState(false)
|
const [fileUploadLoading, setFileUploadLoading] = useState(false)
|
||||||
const [pathUploadLoading, setPathUploadLoading] = useState(false)
|
const [pathUploadLoading, setPathUploadLoading] = useState(false)
|
||||||
const [selectedFile, setSelectedFile] = useState<File | null>(null)
|
const [selectedFile, setSelectedFile] = useState<File | null>(null)
|
||||||
const [folderPath, setFolderPath] = useState("")
|
const [folderPath, setFolderPath] = useState("/app/documents/")
|
||||||
const [uploadStatus, setUploadStatus] = useState<string>("")
|
const [uploadStatus, setUploadStatus] = useState<string>("")
|
||||||
|
|
||||||
const handleFileUpload = async (e: React.FormEvent) => {
|
const handleFileUpload = async (e: React.FormEvent) => {
|
||||||
|
|
|
||||||
41
src/app.py
41
src/app.py
|
|
@ -8,6 +8,7 @@ os.environ['USE_CPU_ONLY'] = 'true'
|
||||||
import hashlib
|
import hashlib
|
||||||
import tempfile
|
import tempfile
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import time
|
||||||
|
|
||||||
from starlette.applications import Starlette
|
from starlette.applications import Starlette
|
||||||
from starlette.requests import Request
|
from starlette.requests import Request
|
||||||
|
|
@ -25,14 +26,19 @@ from openai import OpenAI
|
||||||
converter = DocumentConverter() # basic converter; tweak via PipelineOptions if you need OCR, etc.
|
converter = DocumentConverter() # basic converter; tweak via PipelineOptions if you need OCR, etc.
|
||||||
|
|
||||||
# Initialize Async OpenSearch (adjust hosts/auth as needed)
|
# Initialize Async OpenSearch (adjust hosts/auth as needed)
|
||||||
|
opensearch_host = os.getenv("OPENSEARCH_HOST", "localhost")
|
||||||
|
opensearch_port = int(os.getenv("OPENSEARCH_PORT", "9200"))
|
||||||
|
opensearch_username = os.getenv("OPENSEARCH_USERNAME", "admin")
|
||||||
|
opensearch_password = os.getenv("OPENSEARCH_PASSWORD", "OSisgendb1!")
|
||||||
|
|
||||||
es = AsyncOpenSearch(
|
es = AsyncOpenSearch(
|
||||||
hosts=[{"host": "localhost", "port": 9200}],
|
hosts=[{"host": opensearch_host, "port": opensearch_port}],
|
||||||
connection_class=AIOHttpConnection,
|
connection_class=AIOHttpConnection,
|
||||||
scheme="https",
|
scheme="https",
|
||||||
use_ssl=True,
|
use_ssl=True,
|
||||||
verify_certs=False,
|
verify_certs=False,
|
||||||
ssl_assert_fingerprint=None,
|
ssl_assert_fingerprint=None,
|
||||||
http_auth=("admin","OSisgendb1!"),
|
http_auth=(opensearch_username, opensearch_password),
|
||||||
http_compress=True,
|
http_compress=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -71,7 +77,26 @@ index_body = {
|
||||||
|
|
||||||
client = patch_openai_with_mcp(OpenAI()) # Get the patched client back
|
client = patch_openai_with_mcp(OpenAI()) # Get the patched client back
|
||||||
|
|
||||||
|
async def wait_for_opensearch():
|
||||||
|
"""Wait for OpenSearch to be ready with retries"""
|
||||||
|
max_retries = 30
|
||||||
|
retry_delay = 2
|
||||||
|
|
||||||
|
for attempt in range(max_retries):
|
||||||
|
try:
|
||||||
|
await es.info()
|
||||||
|
print("OpenSearch is ready!")
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Attempt {attempt + 1}/{max_retries}: OpenSearch not ready yet ({e})")
|
||||||
|
if attempt < max_retries - 1:
|
||||||
|
await asyncio.sleep(retry_delay)
|
||||||
|
else:
|
||||||
|
raise Exception("OpenSearch failed to become ready")
|
||||||
|
|
||||||
async def init_index():
|
async def init_index():
|
||||||
|
await wait_for_opensearch()
|
||||||
|
|
||||||
if not await es.indices.exists(index=INDEX_NAME):
|
if not await es.indices.exists(index=INDEX_NAME):
|
||||||
await es.indices.create(index=INDEX_NAME, body=index_body)
|
await es.indices.create(index=INDEX_NAME, body=index_body)
|
||||||
print(f"Created index '{INDEX_NAME}'")
|
print(f"Created index '{INDEX_NAME}'")
|
||||||
|
|
@ -133,9 +158,9 @@ async def process_file_common(file_path: str, file_hash: str = None):
|
||||||
sha256.update(chunk)
|
sha256.update(chunk)
|
||||||
file_hash = sha256.hexdigest()
|
file_hash = sha256.hexdigest()
|
||||||
|
|
||||||
#exists = await es.exists(index=INDEX_NAME, id=file_hash)
|
exists = await es.exists(index=INDEX_NAME, id=file_hash)
|
||||||
#if exists:
|
if exists:
|
||||||
# return {"status": "unchanged", "id": file_hash}
|
return {"status": "unchanged", "id": file_hash}
|
||||||
|
|
||||||
# convert and extract
|
# convert and extract
|
||||||
# TODO: Check if docling can handle in-memory bytes instead of file path
|
# TODO: Check if docling can handle in-memory bytes instead of file path
|
||||||
|
|
@ -186,9 +211,9 @@ async def upload(request: Request):
|
||||||
tmp.flush()
|
tmp.flush()
|
||||||
|
|
||||||
file_hash = sha256.hexdigest()
|
file_hash = sha256.hexdigest()
|
||||||
#exists = await es.exists(index=INDEX_NAME, id=file_hash)
|
exists = await es.exists(index=INDEX_NAME, id=file_hash)
|
||||||
#if exists:
|
if exists:
|
||||||
# return JSONResponse({"status": "unchanged", "id": file_hash})
|
return JSONResponse({"status": "unchanged", "id": file_hash})
|
||||||
|
|
||||||
result = await process_file_common(tmp.name, file_hash)
|
result = await process_file_common(tmp.name, file_hash)
|
||||||
return JSONResponse(result)
|
return JSONResponse(result)
|
||||||
|
|
|
||||||
13
warm_up_docling.py
Normal file
13
warm_up_docling.py
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
from docling.document_converter import DocumentConverter
|
||||||
|
|
||||||
|
print('Warming up docling models...')
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Use the sample document to warm up docling
|
||||||
|
test_file = "/app/2506.08231v1.pdf"
|
||||||
|
print(f'Using {test_file} to warm up docling...')
|
||||||
|
DocumentConverter().convert(test_file)
|
||||||
|
print('Docling models warmed up successfully')
|
||||||
|
except Exception as e:
|
||||||
|
print(f'Docling warm-up completed with: {e}')
|
||||||
|
# This is expected - we just want to trigger the model downloads
|
||||||
Loading…
Add table
Reference in a new issue