dockerize app

2025-07-11 02:02:18 -04:00 · 2025-07-11 02:02:18 -04:00 · 84c070181c
commit 84c070181c
parent 39efea8612
6 changed files with 131 additions and 9 deletions
--- a/Dockerfile.app
+++ b/Dockerfile.app
@ -0,0 +1,58 @@
 FROM node:18-slim
 # Install Python, uv, and curl
 RUN apt-get update && apt-get install -y \
    python3 \
    python3-pip \
    curl \
    && rm -rf /var/lib/apt/lists/*
 # Install uv
 RUN curl -LsSf https://astral.sh/uv/install.sh | sh
 ENV PATH="/root/.local/bin:$PATH"
 # Set working directory
 WORKDIR /app
 # Copy Python dependencies
 COPY pyproject.toml uv.lock ./
 RUN uv sync
 # Copy Python source
 COPY src/ ./src/
 # Copy sample document and warmup script
 COPY documents/2506.08231v1.pdf ./
 COPY warm_up_docling.py ./
 RUN uv run python warm_up_docling.py && rm warm_up_docling.py 2506.08231v1.pdf
 # Copy frontend dependencies
 COPY frontend/package*.json ./frontend/
 RUN cd frontend && npm install
 # Copy frontend source
 COPY frontend/ ./frontend/
 # Build frontend
 RUN cd frontend && npm run build
 # Create startup script
 RUN echo '#!/bin/bash\n\
 set -e\n\
 echo "Starting Python backend..."\n\
 uv run python src/app.py &\n\
 BACKEND_PID=$!\n\
 echo "Waiting for backend to be ready..."\n\
 until curl -f http://localhost:8000/search -X POST -H "Content-Type: application/json" -d "{\"query\":\"test\"}" > /dev/null 2>&1; do\n\
  echo "Backend not ready yet, waiting..."\n\
  sleep 2\n\
 done\n\
 echo "Backend is ready! Starting Frontend..."\n\
 cd frontend && npm start &\n\
 wait' > /app/start.sh && chmod +x /app/start.sh
 # Expose only frontend port
 EXPOSE 3000
 # Start both services
 CMD ["/app/start.sh"]
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -23,3 +23,25 @@ services:
    ports:
      - "5601:5601"
  gendb:
    build:
      context: .
      dockerfile: Dockerfile.app
    container_name: gendb-app
    depends_on:
      - opensearch
    environment:
      - OPENSEARCH_HOST=opensearch
      - OPENSEARCH_PORT=9200
      - OPENSEARCH_USERNAME=admin
      - OPENSEARCH_PASSWORD=OSisgendb1!
      - OPENAI_API_KEY=${OPENAI_API_KEY}
    ports:
      - "3000:3000"
    volumes:
      - ./src:/app/src
      - ./frontend/src:/app/frontend/src
      - ./pyproject.toml:/app/pyproject.toml
      - ./uv.lock:/app/uv.lock
      - ./documents:/app/documents
--- a/frontend/next.config.ts
+++ b/frontend/next.config.ts
@ -9,6 +9,10 @@ const nextConfig: NextConfig = {
      },
    ];
  },
  // Increase timeout for API routes
  experimental: {
    proxyTimeout: 300000, // 5 minutes
  },
 };
 export default nextConfig;
--- a/frontend/src/app/admin/page.tsx
+++ b/frontend/src/app/admin/page.tsx
@ -11,7 +11,7 @@ export default function AdminPage() {
  const [fileUploadLoading, setFileUploadLoading] = useState(false)
  const [pathUploadLoading, setPathUploadLoading] = useState(false)
  const [selectedFile, setSelectedFile] = useState<File | null>(null)
-  const [folderPath, setFolderPath] = useState("")
+  const [folderPath, setFolderPath] = useState("/app/documents/")
  const [uploadStatus, setUploadStatus] = useState<string>("")
  const handleFileUpload = async (e: React.FormEvent) => {
--- a/src/app.py
+++ b/src/app.py
@ -8,6 +8,7 @@ os.environ['USE_CPU_ONLY'] = 'true'
 import hashlib
 import tempfile
 import asyncio
 import time
 from starlette.applications import Starlette
 from starlette.requests     import Request
@ -25,14 +26,19 @@ from openai import OpenAI
 converter = DocumentConverter()  # basic converter; tweak via PipelineOptions if you need OCR, etc.
 # Initialize Async OpenSearch (adjust hosts/auth as needed)
 opensearch_host = os.getenv("OPENSEARCH_HOST", "localhost")
 opensearch_port = int(os.getenv("OPENSEARCH_PORT", "9200"))
 opensearch_username = os.getenv("OPENSEARCH_USERNAME", "admin")
 opensearch_password = os.getenv("OPENSEARCH_PASSWORD", "OSisgendb1!")
 es = AsyncOpenSearch(
-    hosts=[{"host": "localhost", "port": 9200}],
+    hosts=[{"host": opensearch_host, "port": opensearch_port}],
    connection_class=AIOHttpConnection,
    scheme="https",
    use_ssl=True,
    verify_certs=False,
    ssl_assert_fingerprint=None,
-    http_auth=("admin","OSisgendb1!"),
+    http_auth=(opensearch_username, opensearch_password),
    http_compress=True,
 )
@ -71,7 +77,26 @@ index_body = {
 client = patch_openai_with_mcp(OpenAI())  # Get the patched client back
 async def wait_for_opensearch():
    """Wait for OpenSearch to be ready with retries"""
    max_retries = 30
    retry_delay = 2
    for attempt in range(max_retries):
        try:
            await es.info()
            print("OpenSearch is ready!")
            return
        except Exception as e:
            print(f"Attempt {attempt + 1}/{max_retries}: OpenSearch not ready yet ({e})")
            if attempt < max_retries - 1:
                await asyncio.sleep(retry_delay)
            else:
                raise Exception("OpenSearch failed to become ready")
 async def init_index():
    await wait_for_opensearch()
    if not await es.indices.exists(index=INDEX_NAME):
        await es.indices.create(index=INDEX_NAME, body=index_body)
        print(f"Created index '{INDEX_NAME}'")
@ -133,9 +158,9 @@ async def process_file_common(file_path: str, file_hash: str = None):
                sha256.update(chunk)
        file_hash = sha256.hexdigest()
-    #exists = await es.exists(index=INDEX_NAME, id=file_hash)
+    exists = await es.exists(index=INDEX_NAME, id=file_hash)
-    #if exists:
+    if exists:
-    #    return {"status": "unchanged", "id": file_hash}
+        return {"status": "unchanged", "id": file_hash}
    # convert and extract
    # TODO: Check if docling can handle in-memory bytes instead of file path
@ -186,9 +211,9 @@ async def upload(request: Request):
        tmp.flush()
        file_hash = sha256.hexdigest()
-        #exists = await es.exists(index=INDEX_NAME, id=file_hash)
+        exists = await es.exists(index=INDEX_NAME, id=file_hash)
-        #if exists:
+        if exists:
-        #    return JSONResponse({"status": "unchanged", "id": file_hash})
+            return JSONResponse({"status": "unchanged", "id": file_hash})
        result = await process_file_common(tmp.name, file_hash)
        return JSONResponse(result)
--- a/warm_up_docling.py
+++ b/warm_up_docling.py
@ -0,0 +1,13 @@
 from docling.document_converter import DocumentConverter
 print('Warming up docling models...')
 try:
    # Use the sample document to warm up docling
    test_file = "/app/2506.08231v1.pdf"
    print(f'Using {test_file} to warm up docling...')
    DocumentConverter().convert(test_file)
    print('Docling models warmed up successfully')
 except Exception as e:
    print(f'Docling warm-up completed with: {e}')
    # This is expected - we just want to trigger the model downloads