updated Docker compose
This commit is contained in:
parent
caa6a701d6
commit
68e14bd6f0
4 changed files with 498 additions and 16 deletions
385
DEV-README.md
Normal file
385
DEV-README.md
Normal file
|
|
@ -0,0 +1,385 @@
|
|||
# OpenRAG Development Guide
|
||||
|
||||
A comprehensive guide for setting up and developing OpenRAG locally.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Architecture Overview](#architecture-overview)
|
||||
- [Prerequisites](#prerequisites)
|
||||
- [Environment Setup](#environment-setup)
|
||||
- [Development Methods](#development-methods)
|
||||
- [Local Development (Non-Docker)](#local-development-non-docker)
|
||||
- [Docker Development](#docker-development)
|
||||
- [API Documentation](#api-documentation)
|
||||
- [Troubleshooting](#troubleshooting)
|
||||
- [Contributing](#contributing)
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
OpenRAG consists of four main services:
|
||||
|
||||
1. **Backend** (`src/`) - Python FastAPI/Starlette application with document processing, search, and chat
|
||||
2. **Frontend** (`frontend/`) - Next.js React application
|
||||
3. **OpenSearch** - Document storage and vector search engine
|
||||
4. **Langflow** - AI workflow engine for chat functionality
|
||||
|
||||
### Key Technologies
|
||||
|
||||
- **Backend**: Python 3.13+, Starlette, OpenAI, Docling, OpenSearch
|
||||
- **Frontend**: Next.js 15, React 19, TypeScript, Tailwind CSS
|
||||
- **Dependencies**: UV (Python), npm (Node.js)
|
||||
- **Containerization**: Docker/Podman with Compose
|
||||
|
||||
## Prerequisites
|
||||
|
||||
### System Requirements
|
||||
|
||||
- **Python**: 3.13+ (for local development)
|
||||
- **Node.js**: 18+ (for frontend development)
|
||||
- **Container Runtime**: Docker or Podman with Compose
|
||||
- **Memory**: 8GB+ RAM recommended (especially for GPU workloads)
|
||||
|
||||
### Development Tools
|
||||
|
||||
```bash
|
||||
# Python dependency manager
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
|
||||
# Node.js (via nvm recommended)
|
||||
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | bash
|
||||
nvm install 18
|
||||
nvm use 18
|
||||
```
|
||||
|
||||
## Environment Setup
|
||||
|
||||
### 1. Clone and Setup
|
||||
|
||||
```bash
|
||||
git clone <repository-url>
|
||||
cd openrag
|
||||
```
|
||||
|
||||
### 2. Environment Variables
|
||||
|
||||
Create your environment configuration:
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
Edit `.env` with your configuration:
|
||||
|
||||
```bash
|
||||
# Required
|
||||
OPENSEARCH_PASSWORD=your_secure_password
|
||||
OPENAI_API_KEY=sk-your_openai_api_key
|
||||
|
||||
# Langflow Configuration
|
||||
LANGFLOW_PUBLIC_URL=http://localhost:7860
|
||||
LANGFLOW_SUPERUSER=admin
|
||||
LANGFLOW_SUPERUSER_PASSWORD=your_langflow_password
|
||||
LANGFLOW_SECRET_KEY=your_secret_key_min_32_chars
|
||||
LANGFLOW_AUTO_LOGIN=true
|
||||
LANGFLOW_NEW_USER_IS_ACTIVE=true
|
||||
LANGFLOW_ENABLE_SUPERUSER_CLI=true
|
||||
FLOW_ID=your_flow_id
|
||||
|
||||
# OAuth (Optional - for Google Drive/OneDrive connectors)
|
||||
GOOGLE_OAUTH_CLIENT_ID=your_google_client_id
|
||||
GOOGLE_OAUTH_CLIENT_SECRET=your_google_client_secret
|
||||
MICROSOFT_GRAPH_OAUTH_CLIENT_ID=your_microsoft_client_id
|
||||
MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=your_microsoft_client_secret
|
||||
|
||||
# Webhooks (Optional)
|
||||
WEBHOOK_BASE_URL=https://your-domain.com
|
||||
|
||||
# AWS S3 (Optional)
|
||||
AWS_ACCESS_KEY_ID=your_aws_key
|
||||
AWS_SECRET_ACCESS_KEY=your_aws_secret
|
||||
```
|
||||
|
||||
## Development Methods
|
||||
|
||||
Choose your preferred development approach:
|
||||
|
||||
## Local Development (Non-Docker)
|
||||
|
||||
Best for rapid development and debugging.
|
||||
|
||||
### Backend Setup
|
||||
|
||||
```bash
|
||||
# Install Python dependencies
|
||||
uv sync
|
||||
|
||||
# Start OpenSearch (required dependency)
|
||||
docker run -d \
|
||||
--name opensearch-dev \
|
||||
-p 9200:9200 \
|
||||
-p 9600:9600 \
|
||||
-e "discovery.type=single-node" \
|
||||
-e "OPENSEARCH_INITIAL_ADMIN_PASSWORD=admin123" \
|
||||
opensearchproject/opensearch:3.0.0
|
||||
|
||||
# Start backend
|
||||
cd src
|
||||
uv run python main.py
|
||||
```
|
||||
|
||||
Backend will be available at: http://localhost:8000
|
||||
|
||||
### Frontend Setup
|
||||
|
||||
```bash
|
||||
# Install Node.js dependencies
|
||||
cd frontend
|
||||
npm install
|
||||
|
||||
# Start development server
|
||||
npm run dev
|
||||
```
|
||||
|
||||
Frontend will be available at: http://localhost:3000
|
||||
|
||||
### Langflow Setup (Optional)
|
||||
|
||||
```bash
|
||||
# Install and run Langflow
|
||||
pip install langflow
|
||||
langflow run --host 0.0.0.0 --port 7860
|
||||
```
|
||||
|
||||
Langflow will be available at: http://localhost:7860
|
||||
|
||||
## Docker Development
|
||||
|
||||
Use this for a production-like environment or when you need all services.
|
||||
|
||||
### Available Compose Files
|
||||
|
||||
- `docker-compose-dev.yml` - Development (builds from source)
|
||||
- `docker-compose.yml` - Production (pre-built images)
|
||||
- `docker-compose-cpu.yml` - CPU-only version
|
||||
|
||||
### Development with Docker
|
||||
|
||||
```bash
|
||||
# Build and start all services
|
||||
docker compose -f docker-compose-dev.yml up --build
|
||||
|
||||
# Or with Podman
|
||||
podman compose -f docker-compose-dev.yml up --build
|
||||
|
||||
# Run in background
|
||||
docker compose -f docker-compose-dev.yml up --build -d
|
||||
|
||||
# View logs
|
||||
docker compose -f docker-compose-dev.yml logs -f
|
||||
|
||||
# Stop services
|
||||
docker compose -f docker-compose-dev.yml down
|
||||
```
|
||||
|
||||
### Service Ports
|
||||
|
||||
- **Frontend**: http://localhost:3000
|
||||
- **Backend**: http://localhost:8000 (internal)
|
||||
- **OpenSearch**: http://localhost:9200
|
||||
- **OpenSearch Dashboards**: http://localhost:5601
|
||||
- **Langflow**: http://localhost:7860
|
||||
|
||||
### Reset Development Environment
|
||||
|
||||
```bash
|
||||
# Complete reset (removes volumes and rebuilds)
|
||||
docker compose -f docker-compose-dev.yml down -v
|
||||
docker compose -f docker-compose-dev.yml up --build --force-recreate --remove-orphans
|
||||
```
|
||||
|
||||
## API Documentation
|
||||
|
||||
### Key Endpoints
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
|----------|--------|-------------|
|
||||
| `/search` | POST | Search documents with filters |
|
||||
| `/upload` | POST | Upload documents |
|
||||
| `/upload_path` | POST | Upload from local path |
|
||||
| `/tasks` | GET | List processing tasks |
|
||||
| `/tasks/{id}` | GET | Get task status |
|
||||
| `/connectors` | GET | List available connectors |
|
||||
| `/auth/me` | GET | Get current user info |
|
||||
| `/knowledge-filter` | POST/GET | Manage knowledge filters |
|
||||
|
||||
### Example API Calls
|
||||
|
||||
```bash
|
||||
# Search all documents
|
||||
curl -X POST http://localhost:8000/search \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"query": "*", "limit": 100}'
|
||||
|
||||
# Upload a document
|
||||
curl -X POST http://localhost:8000/upload \
|
||||
-F "file=@document.pdf"
|
||||
|
||||
# Get task status
|
||||
curl http://localhost:8000/tasks/task_id_here
|
||||
```
|
||||
|
||||
### Frontend API Proxy
|
||||
|
||||
The Next.js frontend proxies API calls through `/api/*` to the backend at `http://openrag-backend:8000` (in Docker) or `http://localhost:8000` (local).
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
#### Docker/Podman Issues
|
||||
|
||||
**Issue**: `docker: command not found`
|
||||
```bash
|
||||
# Install Docker Desktop or use Podman
|
||||
brew install podman podman-desktop
|
||||
podman machine init --memory 8192
|
||||
podman machine start
|
||||
```
|
||||
|
||||
**Issue**: Out of memory during build
|
||||
```bash
|
||||
# For Podman on macOS
|
||||
podman machine stop
|
||||
podman machine rm
|
||||
podman machine init --memory 8192
|
||||
podman machine start
|
||||
```
|
||||
|
||||
#### Backend Issues
|
||||
|
||||
**Issue**: `ModuleNotFoundError` or dependency issues
|
||||
```bash
|
||||
# Ensure you're using the right Python version
|
||||
python --version # Should be 3.13+
|
||||
uv sync --reinstall
|
||||
```
|
||||
|
||||
**Issue**: OpenSearch connection failed
|
||||
```bash
|
||||
# Check if OpenSearch is running
|
||||
curl -k -u admin:admin123 https://localhost:9200
|
||||
# If using Docker, ensure the container is running
|
||||
docker ps | grep opensearch
|
||||
```
|
||||
|
||||
**Issue**: CUDA/GPU not detected
|
||||
```bash
|
||||
# Check GPU availability
|
||||
python -c "import torch; print(torch.cuda.is_available())"
|
||||
# For CPU-only development, use docker-compose-cpu.yml
|
||||
```
|
||||
|
||||
#### Frontend Issues
|
||||
|
||||
**Issue**: Next.js build failures
|
||||
```bash
|
||||
# Clear cache and reinstall
|
||||
cd frontend
|
||||
rm -rf .next node_modules package-lock.json
|
||||
npm install
|
||||
npm run dev
|
||||
```
|
||||
|
||||
**Issue**: API calls failing
|
||||
- Check that backend is running on port 8000
|
||||
- Verify environment variables are set correctly
|
||||
- Check browser network tab for CORS or proxy issues
|
||||
|
||||
#### Document Processing Issues
|
||||
|
||||
**Issue**: Docling model download failures
|
||||
```bash
|
||||
# Pre-download models
|
||||
uv run docling-tools models download
|
||||
# Or clear cache and retry
|
||||
rm -rf ~/.cache/docling
|
||||
```
|
||||
|
||||
**Issue**: EasyOCR initialization errors
|
||||
```bash
|
||||
# Clear EasyOCR cache
|
||||
rm -rf ~/.EasyOCR
|
||||
# Restart the backend to reinitialize
|
||||
```
|
||||
|
||||
### Development Tips
|
||||
|
||||
1. **Hot Reloading**:
|
||||
- Backend: Use `uvicorn src.main:app --reload` for auto-restart
|
||||
- Frontend: `npm run dev` provides hot reloading
|
||||
|
||||
2. **Debugging**:
|
||||
- Add `print()` statements or use `pdb.set_trace()` in Python
|
||||
- Use browser dev tools for frontend debugging
|
||||
- Check Docker logs: `docker compose logs -f service_name`
|
||||
|
||||
3. **Database Inspection**:
|
||||
- Access OpenSearch Dashboards at http://localhost:5601
|
||||
- Use curl to query OpenSearch directly
|
||||
- Check the `documents` index for uploaded content
|
||||
|
||||
4. **Performance**:
|
||||
- GPU processing is much faster for document processing
|
||||
- Use CPU-only mode if GPU issues occur
|
||||
- Monitor memory usage with `docker stats` or `htop`
|
||||
|
||||
### Log Locations
|
||||
|
||||
- **Backend**: Console output or container logs
|
||||
- **Frontend**: Browser console and Next.js terminal
|
||||
- **OpenSearch**: Container logs (`docker compose logs opensearch`)
|
||||
- **Langflow**: Container logs (`docker compose logs langflow`)
|
||||
|
||||
## Contributing
|
||||
|
||||
### Code Style
|
||||
|
||||
- **Python**: Follow PEP 8, use `black` for formatting
|
||||
- **TypeScript**: Use ESLint configuration in `frontend/`
|
||||
- **Commits**: Use conventional commit messages
|
||||
|
||||
### Development Workflow
|
||||
|
||||
1. Create feature branch from `main`
|
||||
2. Make changes and test locally
|
||||
3. Run tests (if available)
|
||||
4. Create pull request with description
|
||||
5. Ensure all checks pass
|
||||
|
||||
### Testing
|
||||
|
||||
```bash
|
||||
# Backend tests (if available)
|
||||
cd src
|
||||
uv run pytest
|
||||
|
||||
# Frontend tests (if available)
|
||||
cd frontend
|
||||
npm test
|
||||
|
||||
# Integration tests with Docker
|
||||
docker compose -f docker-compose-dev.yml up --build
|
||||
# Test API endpoints manually or with automated tests
|
||||
```
|
||||
|
||||
## Additional Resources
|
||||
|
||||
- [OpenSearch Documentation](https://opensearch.org/docs/)
|
||||
- [Langflow Documentation](https://docs.langflow.org/)
|
||||
- [Next.js Documentation](https://nextjs.org/docs)
|
||||
- [Starlette Documentation](https://www.starlette.io/)
|
||||
- [Docling Documentation](https://ds4sd.github.io/docling/)
|
||||
|
||||
---
|
||||
|
||||
For questions or issues, please check the troubleshooting section above or create an issue in the repository.
|
||||
105
docker-compose-dev.yml
Normal file
105
docker-compose-dev.yml
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
services:
|
||||
opensearch:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
container_name: os
|
||||
depends_on:
|
||||
- openrag-backend
|
||||
environment:
|
||||
- discovery.type=single-node
|
||||
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD}
|
||||
# Run security setup in background after OpenSearch starts
|
||||
command: >
|
||||
bash -c "
|
||||
# Start OpenSearch in background
|
||||
/usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
|
||||
|
||||
# Wait a bit for OpenSearch to start, then apply security config
|
||||
sleep 10 && /usr/share/opensearch/setup-security.sh &
|
||||
|
||||
# Wait for background processes
|
||||
wait
|
||||
"
|
||||
ports:
|
||||
- "9200:9200"
|
||||
- "9600:9600"
|
||||
|
||||
dashboards:
|
||||
image: opensearchproject/opensearch-dashboards:3.0.0
|
||||
container_name: osdash
|
||||
depends_on:
|
||||
- opensearch
|
||||
environment:
|
||||
OPENSEARCH_HOSTS: '["https://opensearch:9200"]'
|
||||
OPENSEARCH_USERNAME: "admin"
|
||||
OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD}
|
||||
ports:
|
||||
- "5601:5601"
|
||||
|
||||
openrag-backend:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.backend
|
||||
container_name: openrag-backend
|
||||
depends_on:
|
||||
- langflow
|
||||
environment:
|
||||
- OPENSEARCH_HOST=opensearch
|
||||
- LANGFLOW_URL=http://langflow:7860
|
||||
- LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL}
|
||||
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
|
||||
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
|
||||
- FLOW_ID=${FLOW_ID}
|
||||
- OPENSEARCH_PORT=9200
|
||||
- OPENSEARCH_USERNAME=admin
|
||||
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
|
||||
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
- NVIDIA_VISIBLE_DEVICES=all
|
||||
- GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID}
|
||||
- GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET}
|
||||
- MICROSOFT_GRAPH_OAUTH_CLIENT_ID=${MICROSOFT_GRAPH_OAUTH_CLIENT_ID}
|
||||
- MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=${MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET}
|
||||
- WEBHOOK_BASE_URL=${WEBHOOK_BASE_URL}
|
||||
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
|
||||
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
|
||||
volumes:
|
||||
- ./documents:/app/documents:Z
|
||||
- ./keys:/app/keys:Z
|
||||
gpus: all
|
||||
|
||||
openrag-frontend:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.frontend
|
||||
container_name: openrag-frontend
|
||||
depends_on:
|
||||
- openrag-backend
|
||||
environment:
|
||||
- OPENRAG_BACKEND_HOST=openrag-backend
|
||||
ports:
|
||||
- "3000:3000"
|
||||
|
||||
langflow:
|
||||
volumes:
|
||||
- ./flows:/app/flows:Z
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.langflow
|
||||
container_name: langflow
|
||||
ports:
|
||||
- "7860:7860"
|
||||
environment:
|
||||
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||
- LANGFLOW_LOAD_FLOWS_PATH=/app/flows
|
||||
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
|
||||
- JWT="dummy"
|
||||
- OPENRAG-QUERY-FILTER="{}"
|
||||
- LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER
|
||||
- LANGFLOW_LOG_LEVEL=DEBUG
|
||||
- LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
|
||||
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
|
||||
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
|
||||
- LANGFLOW_NEW_USER_IS_ACTIVE=${LANGFLOW_NEW_USER_IS_ACTIVE}
|
||||
- LANGFLOW_ENABLE_SUPERUSER_CLI=${LANGFLOW_ENABLE_SUPERUSER_CLI}
|
||||
|
|
@ -5,6 +5,10 @@ const nextConfig: NextConfig = {
|
|||
experimental: {
|
||||
proxyTimeout: 300000, // 5 minutes
|
||||
},
|
||||
// Ignore ESLint errors during build
|
||||
eslint: {
|
||||
ignoreDuringBuilds: true,
|
||||
},
|
||||
};
|
||||
|
||||
export default nextConfig;
|
||||
|
|
|
|||
|
|
@ -29,15 +29,6 @@ interface GraphResponse {
|
|||
value: OneDriveFile[]
|
||||
}
|
||||
|
||||
declare global {
|
||||
interface Window {
|
||||
mgt?: {
|
||||
Providers: {
|
||||
globalProvider: any
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function OneDrivePicker({
|
||||
onFileSelected,
|
||||
|
|
@ -59,15 +50,12 @@ export function OneDrivePicker({
|
|||
|
||||
useEffect(() => {
|
||||
const loadMGT = async () => {
|
||||
if (typeof window !== 'undefined' && !window.mgt) {
|
||||
if (typeof window !== 'undefined') {
|
||||
try {
|
||||
const mgtModule = await import('@microsoft/mgt-components')
|
||||
const mgtProvider = await import('@microsoft/mgt-msal2-provider')
|
||||
await import('@microsoft/mgt-components')
|
||||
await import('@microsoft/mgt-msal2-provider')
|
||||
|
||||
// Initialize provider if needed
|
||||
if (!window.mgt?.Providers?.globalProvider && accessToken) {
|
||||
// For simplicity, we'll use direct Graph API calls instead of MGT components
|
||||
}
|
||||
// For simplicity, we'll use direct Graph API calls instead of MGT components
|
||||
} catch (error) {
|
||||
console.warn('MGT not available, falling back to direct API calls')
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue