feat: Add Single Bucket Mode for MinIO/S3
This commit is contained in:
parent
0d5589bfda
commit
9393c3c185
5 changed files with 387 additions and 140 deletions
|
|
@ -17,6 +17,8 @@ minio:
|
||||||
user: 'rag_flow'
|
user: 'rag_flow'
|
||||||
password: 'infini_rag_flow'
|
password: 'infini_rag_flow'
|
||||||
host: 'localhost:9000'
|
host: 'localhost:9000'
|
||||||
|
bucket: ''
|
||||||
|
prefix_path: ''
|
||||||
es:
|
es:
|
||||||
hosts: 'http://localhost:1200'
|
hosts: 'http://localhost:1200'
|
||||||
username: 'elastic'
|
username: 'elastic'
|
||||||
|
|
|
||||||
108
docker/.env.single-bucket-example
Normal file
108
docker/.env.single-bucket-example
Normal file
|
|
@ -0,0 +1,108 @@
|
||||||
|
# Example: Single Bucket Mode Configuration
|
||||||
|
#
|
||||||
|
# This file shows how to configure RAGFlow to use a single MinIO/S3 bucket
|
||||||
|
# with directory structure instead of creating multiple buckets.
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# MinIO/S3 Configuration for Single Bucket Mode
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
# MinIO/S3 Endpoint (with port if not default)
|
||||||
|
# For HTTPS (port 443), the connection will automatically use secure=True
|
||||||
|
export MINIO_HOST=minio.example.com:443
|
||||||
|
|
||||||
|
# Access credentials
|
||||||
|
export MINIO_USER=your-access-key
|
||||||
|
export MINIO_PASSWORD=your-secret-password-here
|
||||||
|
|
||||||
|
# Single Bucket Configuration (NEW!)
|
||||||
|
# If set, all data will be stored in this bucket instead of creating
|
||||||
|
# separate buckets for each knowledge base
|
||||||
|
export MINIO_BUCKET=ragflow-bucket
|
||||||
|
|
||||||
|
# Optional: Prefix path within the bucket (NEW!)
|
||||||
|
# If set, all files will be stored under this prefix
|
||||||
|
# Example: bucket/prefix_path/kb_id/file.pdf
|
||||||
|
export MINIO_PREFIX_PATH=ragflow
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Alternative: Multi-Bucket Mode (Default)
|
||||||
|
# ============================================================================
|
||||||
|
#
|
||||||
|
# To use the original multi-bucket mode, simply don't set MINIO_BUCKET
|
||||||
|
# and MINIO_PREFIX_PATH:
|
||||||
|
#
|
||||||
|
# export MINIO_HOST=minio.local
|
||||||
|
# export MINIO_USER=admin
|
||||||
|
# export MINIO_PASSWORD=password
|
||||||
|
# # MINIO_BUCKET not set
|
||||||
|
# # MINIO_PREFIX_PATH not set
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Storage Mode Selection (Environment Variable)
|
||||||
|
# ============================================================================
|
||||||
|
#
|
||||||
|
# Make sure this is set to use MinIO (default)
|
||||||
|
export STORAGE_IMPL=MINIO
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Example Path Structures
|
||||||
|
# ============================================================================
|
||||||
|
#
|
||||||
|
# Multi-Bucket Mode (default):
|
||||||
|
# bucket: kb_12345/file.pdf
|
||||||
|
# bucket: kb_67890/file.pdf
|
||||||
|
# bucket: folder_abc/file.txt
|
||||||
|
#
|
||||||
|
# Single Bucket Mode (MINIO_BUCKET set):
|
||||||
|
# bucket: ragflow-bucket/kb_12345/file.pdf
|
||||||
|
# bucket: ragflow-bucket/kb_67890/file.pdf
|
||||||
|
# bucket: ragflow-bucket/folder_abc/file.txt
|
||||||
|
#
|
||||||
|
# Single Bucket with Prefix (both set):
|
||||||
|
# bucket: ragflow-bucket/ragflow/kb_12345/file.pdf
|
||||||
|
# bucket: ragflow-bucket/ragflow/kb_67890/file.pdf
|
||||||
|
# bucket: ragflow-bucket/ragflow/folder_abc/file.txt
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# IAM Policy for Single Bucket Mode
|
||||||
|
# ============================================================================
|
||||||
|
#
|
||||||
|
# When using single bucket mode, you only need permissions for one bucket:
|
||||||
|
#
|
||||||
|
# {
|
||||||
|
# "Version": "2012-10-17",
|
||||||
|
# "Statement": [
|
||||||
|
# {
|
||||||
|
# "Effect": "Allow",
|
||||||
|
# "Action": ["s3:*"],
|
||||||
|
# "Resource": [
|
||||||
|
# "arn:aws:s3:::ragflow-bucket",
|
||||||
|
# "arn:aws:s3:::ragflow-bucket/*"
|
||||||
|
# ]
|
||||||
|
# }
|
||||||
|
# ]
|
||||||
|
# }
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Testing the Configuration
|
||||||
|
# ============================================================================
|
||||||
|
#
|
||||||
|
# After setting these variables, you can test with MinIO Client (mc):
|
||||||
|
#
|
||||||
|
# # Configure mc alias
|
||||||
|
# mc alias set ragflow https://minio.example.com:443 \
|
||||||
|
# your-access-key \
|
||||||
|
# your-secret-password-here
|
||||||
|
#
|
||||||
|
# # List bucket contents
|
||||||
|
# mc ls ragflow/ragflow-bucket/
|
||||||
|
#
|
||||||
|
# # If prefix is set, check the prefix
|
||||||
|
# mc ls ragflow/ragflow-bucket/ragflow/
|
||||||
|
#
|
||||||
|
# # Test write permission
|
||||||
|
# echo "test" | mc pipe ragflow/ragflow-bucket/ragflow/_test.txt
|
||||||
|
#
|
||||||
|
# # Clean up test file
|
||||||
|
# mc rm ragflow/ragflow-bucket/ragflow/_test.txt
|
||||||
|
|
@ -1,22 +1,31 @@
|
||||||
ragflow:
|
ragflow:
|
||||||
host: ${RAGFLOW_HOST:-0.0.0.0}
|
host: ${RAGFLOW_HOST:-0.0.0.0}
|
||||||
http_port: 9380
|
http_port: 9380
|
||||||
|
svr_http_port: 8080
|
||||||
admin:
|
admin:
|
||||||
host: ${RAGFLOW_HOST:-0.0.0.0}
|
host: ${RAGFLOW_HOST:-0.0.0.0}
|
||||||
http_port: 9381
|
http_port: 9381
|
||||||
mysql:
|
# mysql:
|
||||||
name: '${MYSQL_DBNAME:-rag_flow}'
|
# name: '${MYSQL_DBNAME:-rag_flow}'
|
||||||
user: '${MYSQL_USER:-root}'
|
# user: '${MYSQL_USER:-root}'
|
||||||
password: '${MYSQL_PASSWORD:-infini_rag_flow}'
|
# password: '${MYSQL_PASSWORD:-infini_rag_flow}'
|
||||||
host: '${MYSQL_HOST:-mysql}'
|
# host: '${MYSQL_HOST:-mysql}'
|
||||||
port: 3306
|
# port: 3306
|
||||||
max_connections: 900
|
# max_connections: 900
|
||||||
stale_timeout: 300
|
# stale_timeout: 300
|
||||||
max_allowed_packet: ${MYSQL_MAX_PACKET:-1073741824}
|
# max_allowed_packet: ${MYSQL_MAX_PACKET:-1073741824}
|
||||||
minio:
|
minio:
|
||||||
user: '${MINIO_USER:-rag_flow}'
|
user: '${MINIO_USER:-rag_flow}'
|
||||||
password: '${MINIO_PASSWORD:-infini_rag_flow}'
|
password: '${MINIO_PASSWORD:-infini_rag_flow}'
|
||||||
host: '${MINIO_HOST:-minio}:9000'
|
host: '${MINIO_HOST:-minio}:9000'
|
||||||
|
bucket: '${MINIO_BUCKET:-}'
|
||||||
|
prefix_path: '${MINIO_PREFIX_PATH:-}'
|
||||||
|
# s3:
|
||||||
|
# access_key: '${MINIO_USER}'
|
||||||
|
# secret_key: '${MINIO_PASSWORD}'
|
||||||
|
# endpoint_url: 'https://${MINIO_HOST}'
|
||||||
|
# bucket: 'ragflow-bucket'
|
||||||
|
# region: 'us-east-1'
|
||||||
es:
|
es:
|
||||||
hosts: 'http://${ES_HOST:-es01}:9200'
|
hosts: 'http://${ES_HOST:-es01}:9200'
|
||||||
username: '${ES_USER:-elastic}'
|
username: '${ES_USER:-elastic}'
|
||||||
|
|
@ -28,40 +37,19 @@ os:
|
||||||
infinity:
|
infinity:
|
||||||
uri: '${INFINITY_HOST:-infinity}:23817'
|
uri: '${INFINITY_HOST:-infinity}:23817'
|
||||||
db_name: 'default_db'
|
db_name: 'default_db'
|
||||||
oceanbase:
|
|
||||||
scheme: 'oceanbase' # set 'mysql' to create connection using mysql config
|
|
||||||
config:
|
|
||||||
db_name: '${OCEANBASE_DOC_DBNAME:-test}'
|
|
||||||
user: '${OCEANBASE_USER:-root@ragflow}'
|
|
||||||
password: '${OCEANBASE_PASSWORD:-infini_rag_flow}'
|
|
||||||
host: '${OCEANBASE_HOST:-oceanbase}'
|
|
||||||
port: ${OCEANBASE_PORT:-2881}
|
|
||||||
redis:
|
redis:
|
||||||
db: 1
|
db: 1
|
||||||
password: '${REDIS_PASSWORD:-infini_rag_flow}'
|
password: '${REDIS_PASSWORD:-infini_rag_flow}'
|
||||||
host: '${REDIS_HOST:-redis}:6379'
|
host: '${REDIS_HOST:-redis}:6379'
|
||||||
user_default_llm:
|
|
||||||
default_models:
|
postgres:
|
||||||
embedding_model:
|
name: '${POSTGRES_DBNAME:-rag_flow}'
|
||||||
api_key: 'xxx'
|
user: '${POSTGRES_USER:-rag_flow}'
|
||||||
base_url: 'http://${TEI_HOST}:80'
|
password: '${POSTGRES_PASSWORD:-infini_rag_flow}'
|
||||||
# postgres:
|
host: '${POSTGRES_HOST:-postgres}'
|
||||||
# name: '${POSTGRES_DBNAME:-rag_flow}'
|
port: 5432
|
||||||
# user: '${POSTGRES_USER:-rag_flow}'
|
max_connections: 100
|
||||||
# password: '${POSTGRES_PASSWORD:-infini_rag_flow}'
|
stale_timeout: 30
|
||||||
# host: '${POSTGRES_HOST:-postgres}'
|
|
||||||
# port: 5432
|
|
||||||
# max_connections: 100
|
|
||||||
# stale_timeout: 30
|
|
||||||
# s3:
|
|
||||||
# access_key: 'access_key'
|
|
||||||
# secret_key: 'secret_key'
|
|
||||||
# region: 'region'
|
|
||||||
# endpoint_url: 'endpoint_url'
|
|
||||||
# bucket: 'bucket'
|
|
||||||
# prefix_path: 'prefix_path'
|
|
||||||
# signature_version: 'v4'
|
|
||||||
# addressing_style: 'path'
|
|
||||||
# oss:
|
# oss:
|
||||||
# access_key: '${ACCESS_KEY}'
|
# access_key: '${ACCESS_KEY}'
|
||||||
# secret_key: '${SECRET_KEY}'
|
# secret_key: '${SECRET_KEY}'
|
||||||
|
|
@ -80,50 +68,27 @@ user_default_llm:
|
||||||
# secret: 'secret'
|
# secret: 'secret'
|
||||||
# tenant_id: 'tenant_id'
|
# tenant_id: 'tenant_id'
|
||||||
# container_name: 'container_name'
|
# container_name: 'container_name'
|
||||||
# The OSS object storage uses the MySQL configuration above by default. If you need to switch to another object storage service, please uncomment and configure the following parameters.
|
|
||||||
# opendal:
|
|
||||||
# scheme: 'mysql' # Storage type, such as s3, oss, azure, etc.
|
|
||||||
# config:
|
|
||||||
# oss_table: 'opendal_storage'
|
|
||||||
# user_default_llm:
|
# user_default_llm:
|
||||||
# factory: 'BAAI'
|
# factory: 'Tongyi-Qianwen'
|
||||||
# api_key: 'backup'
|
# api_key: 'sk-xxxxxxxxxxxxx'
|
||||||
# base_url: 'backup_base_url'
|
# base_url: ''
|
||||||
# default_models:
|
# default_models:
|
||||||
# chat_model:
|
# chat_model: 'qwen-plus'
|
||||||
# name: 'qwen2.5-7b-instruct'
|
# embedding_model: 'BAAI/bge-large-zh-v1.5@BAAI'
|
||||||
# factory: 'xxxx'
|
# rerank_model: ''
|
||||||
# api_key: 'xxxx'
|
# asr_model: ''
|
||||||
# base_url: 'https://api.xx.com'
|
|
||||||
# embedding_model:
|
|
||||||
# name: 'bge-m3'
|
|
||||||
# rerank_model: 'bge-reranker-v2'
|
|
||||||
# asr_model:
|
|
||||||
# model: 'whisper-large-v3' # alias of name
|
|
||||||
# image2text_model: ''
|
# image2text_model: ''
|
||||||
# oauth:
|
# oauth:
|
||||||
# oauth2:
|
|
||||||
# display_name: "OAuth2"
|
|
||||||
# client_id: "your_client_id"
|
|
||||||
# client_secret: "your_client_secret"
|
|
||||||
# authorization_url: "https://your-oauth-provider.com/oauth/authorize"
|
|
||||||
# token_url: "https://your-oauth-provider.com/oauth/token"
|
|
||||||
# userinfo_url: "https://your-oauth-provider.com/oauth/userinfo"
|
|
||||||
# redirect_uri: "https://your-app.com/v1/user/oauth/callback/oauth2"
|
|
||||||
# oidc:
|
|
||||||
# display_name: "OIDC"
|
|
||||||
# client_id: "your_client_id"
|
|
||||||
# client_secret: "your_client_secret"
|
|
||||||
# issuer: "https://your-oauth-provider.com/oidc"
|
|
||||||
# scope: "openid email profile"
|
|
||||||
# redirect_uri: "https://your-app.com/v1/user/oauth/callback/oidc"
|
|
||||||
# github:
|
# github:
|
||||||
# type: "github"
|
# client_id: xxxxxxxxxxxxxxxxxxxxxxxxx
|
||||||
# icon: "github"
|
# secret_key: xxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
||||||
# display_name: "Github"
|
# url: https://github.com/login/oauth/access_token
|
||||||
# client_id: "your_client_id"
|
# feishu:
|
||||||
# client_secret: "your_client_secret"
|
# app_id: cli_xxxxxxxxxxxxxxxxxxx
|
||||||
# redirect_uri: "https://your-app.com/v1/user/oauth/callback/github"
|
# app_secret: xxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
||||||
|
# app_access_token_url: https://open.feishu.cn/open-apis/auth/v3/app_access_token/internal
|
||||||
|
# user_access_token_url: https://open.feishu.cn/open-apis/authen/v1/oidc/access_token
|
||||||
|
# grant_type: 'authorization_code'
|
||||||
# authentication:
|
# authentication:
|
||||||
# client:
|
# client:
|
||||||
# switch: false
|
# switch: false
|
||||||
|
|
@ -135,18 +100,3 @@ user_default_llm:
|
||||||
# switch: false
|
# switch: false
|
||||||
# component: false
|
# component: false
|
||||||
# dataset: false
|
# dataset: false
|
||||||
# smtp:
|
|
||||||
# mail_server: ""
|
|
||||||
# mail_port: 465
|
|
||||||
# mail_use_ssl: true
|
|
||||||
# mail_use_tls: false
|
|
||||||
# mail_username: ""
|
|
||||||
# mail_password: ""
|
|
||||||
# mail_default_sender:
|
|
||||||
# - "RAGFlow" # display name
|
|
||||||
# - "" # sender email address
|
|
||||||
# mail_frontend_url: "https://your-frontend.example.com"
|
|
||||||
# tcadp_config:
|
|
||||||
# secret_id: '${TENCENT_SECRET_ID}'
|
|
||||||
# secret_key: '${TENCENT_SECRET_KEY}'
|
|
||||||
# region: '${TENCENT_REGION}'
|
|
||||||
|
|
|
||||||
162
docs/single-bucket-mode.md
Normal file
162
docs/single-bucket-mode.md
Normal file
|
|
@ -0,0 +1,162 @@
|
||||||
|
# Single Bucket Mode for MinIO/S3
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
By default, RAGFlow creates one bucket per Knowledge Base (dataset) and one bucket per user folder. This can be problematic when:
|
||||||
|
|
||||||
|
- Your cloud provider charges per bucket
|
||||||
|
- Your IAM policy restricts bucket creation
|
||||||
|
- You want all data organized in a single bucket with directory structure
|
||||||
|
|
||||||
|
The **Single Bucket Mode** allows you to configure RAGFlow to use a single bucket with a directory structure instead of multiple buckets.
|
||||||
|
|
||||||
|
## How It Works
|
||||||
|
|
||||||
|
### Default Mode (Multiple Buckets)
|
||||||
|
|
||||||
|
```
|
||||||
|
bucket: kb_12345/
|
||||||
|
└── document_1.pdf
|
||||||
|
bucket: kb_67890/
|
||||||
|
└── document_2.pdf
|
||||||
|
bucket: folder_abc/
|
||||||
|
└── file_3.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
### Single Bucket Mode (with prefix_path)
|
||||||
|
|
||||||
|
```
|
||||||
|
bucket: ragflow-bucket/
|
||||||
|
└── ragflow/
|
||||||
|
├── kb_12345/
|
||||||
|
│ └── document_1.pdf
|
||||||
|
├── kb_67890/
|
||||||
|
│ └── document_2.pdf
|
||||||
|
└── folder_abc/
|
||||||
|
└── file_3.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
### MinIO Configuration
|
||||||
|
|
||||||
|
Edit your `service_conf.yaml` or set environment variables:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
minio:
|
||||||
|
user: "your-access-key"
|
||||||
|
password: "your-secret-key"
|
||||||
|
host: "minio.example.com:443"
|
||||||
|
bucket: "ragflow-bucket" # Default bucket name
|
||||||
|
prefix_path: "ragflow" # Optional prefix path
|
||||||
|
```
|
||||||
|
|
||||||
|
Or using environment variables:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export MINIO_USER=your-access-key
|
||||||
|
export MINIO_PASSWORD=your-secret-key
|
||||||
|
export MINIO_HOST=minio.example.com:443
|
||||||
|
export MINIO_BUCKET=ragflow-bucket
|
||||||
|
export MINIO_PREFIX_PATH=ragflow
|
||||||
|
```
|
||||||
|
|
||||||
|
### S3 Configuration (already supported)
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
s3:
|
||||||
|
access_key: "your-access-key"
|
||||||
|
secret_key: "your-secret-key"
|
||||||
|
endpoint_url: "https://s3.amazonaws.com"
|
||||||
|
bucket: "my-ragflow-bucket"
|
||||||
|
prefix_path: "production"
|
||||||
|
region: "us-east-1"
|
||||||
|
```
|
||||||
|
|
||||||
|
## IAM Policy Example
|
||||||
|
|
||||||
|
When using single bucket mode, you only need permissions for one bucket:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"Version": "2012-10-17",
|
||||||
|
"Statement": [
|
||||||
|
{
|
||||||
|
"Effect": "Allow",
|
||||||
|
"Action": ["s3:*"],
|
||||||
|
"Resource": [
|
||||||
|
"arn:aws:s3:::ragflow-bucket",
|
||||||
|
"arn:aws:s3:::ragflow-bucket/*"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Migration from Multi-Bucket to Single Bucket
|
||||||
|
|
||||||
|
If you're migrating from multi-bucket mode to single-bucket mode:
|
||||||
|
|
||||||
|
1. **Set environment variables** for the new configuration
|
||||||
|
2. **Restart RAGFlow** services
|
||||||
|
3. **Migrate existing data** (optional):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Example using mc (MinIO Client)
|
||||||
|
mc alias set old-minio http://old-minio:9000 ACCESS_KEY SECRET_KEY
|
||||||
|
mc alias set new-minio https://new-minio:443 ACCESS_KEY SECRET_KEY
|
||||||
|
|
||||||
|
# List all knowledge base buckets
|
||||||
|
mc ls old-minio/ | grep kb_ | while read -r line; do
|
||||||
|
bucket=$(echo $line | awk '{print $5}')
|
||||||
|
# Copy each bucket to the new structure
|
||||||
|
mc cp --recursive old-minio/$bucket/ new-minio/ragflow-bucket/ragflow/$bucket/
|
||||||
|
done
|
||||||
|
```
|
||||||
|
|
||||||
|
## Toggle Between Modes
|
||||||
|
|
||||||
|
### Enable Single Bucket Mode
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
minio:
|
||||||
|
bucket: "my-single-bucket"
|
||||||
|
prefix_path: "ragflow"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Disable (Use Multi-Bucket Mode)
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
minio:
|
||||||
|
# Leave bucket and prefix_path empty or commented out
|
||||||
|
# bucket: ''
|
||||||
|
# prefix_path: ''
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Issue: Access Denied errors
|
||||||
|
|
||||||
|
**Solution**: Ensure your IAM policy grants access to the bucket specified in the configuration.
|
||||||
|
|
||||||
|
### Issue: Files not found after switching modes
|
||||||
|
|
||||||
|
**Solution**: The path structure changes between modes. You'll need to migrate existing data.
|
||||||
|
|
||||||
|
### Issue: Connection fails with HTTPS
|
||||||
|
|
||||||
|
**Solution**: Ensure `secure: True` is set in the MinIO connection (automatically handled for port 443).
|
||||||
|
|
||||||
|
## Storage Backends Supported
|
||||||
|
|
||||||
|
- ✅ **MinIO** - Full support with single bucket mode
|
||||||
|
- ✅ **AWS S3** - Full support with single bucket mode
|
||||||
|
- ✅ **Alibaba OSS** - Full support with single bucket mode
|
||||||
|
- ✅ **Azure Blob** - Uses container-based structure (different paradigm)
|
||||||
|
- ⚠️ **OpenDAL** - Depends on underlying storage backend
|
||||||
|
|
||||||
|
## Performance Considerations
|
||||||
|
|
||||||
|
- **Single bucket mode** may have slightly better performance for bucket listing operations
|
||||||
|
- **Multi-bucket mode** provides better isolation and organization for large deployments
|
||||||
|
- Choose based on your specific requirements and infrastructure constraints
|
||||||
|
|
@ -28,8 +28,49 @@ from common import settings
|
||||||
class RAGFlowMinio:
|
class RAGFlowMinio:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.conn = None
|
self.conn = None
|
||||||
|
self.bucket = settings.MINIO.get('bucket', None)
|
||||||
|
self.prefix_path = settings.MINIO.get('prefix_path', None)
|
||||||
self.__open__()
|
self.__open__()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def use_default_bucket(method):
|
||||||
|
def wrapper(self, bucket, *args, **kwargs):
|
||||||
|
# If there is a default bucket, use the default bucket
|
||||||
|
# but preserve the original bucket identifier so it can be
|
||||||
|
# used as a path prefix inside the physical/default bucket.
|
||||||
|
original_bucket = bucket
|
||||||
|
actual_bucket = self.bucket if self.bucket else bucket
|
||||||
|
if self.bucket:
|
||||||
|
# pass original identifier forward for use by other decorators
|
||||||
|
kwargs['_orig_bucket'] = original_bucket
|
||||||
|
return method(self, actual_bucket, *args, **kwargs)
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def use_prefix_path(method):
|
||||||
|
def wrapper(self, bucket, fnm, *args, **kwargs):
|
||||||
|
# If a default MINIO bucket is configured, the use_default_bucket
|
||||||
|
# decorator will have replaced the `bucket` arg with the physical
|
||||||
|
# bucket name and forwarded the original identifier as `_orig_bucket`.
|
||||||
|
# Prefer that original identifier when constructing the key path so
|
||||||
|
# objects are stored under <physical-bucket>/<identifier>/...
|
||||||
|
orig_bucket = kwargs.pop('_orig_bucket', None)
|
||||||
|
|
||||||
|
if self.prefix_path:
|
||||||
|
# If a prefix_path is configured, include it and then the identifier
|
||||||
|
if orig_bucket:
|
||||||
|
fnm = f"{self.prefix_path}/{orig_bucket}/{fnm}"
|
||||||
|
else:
|
||||||
|
fnm = f"{self.prefix_path}/{fnm}"
|
||||||
|
else:
|
||||||
|
# No prefix_path configured. If orig_bucket exists and the
|
||||||
|
# physical bucket equals configured default, use orig_bucket as a path.
|
||||||
|
if orig_bucket and bucket == self.bucket:
|
||||||
|
fnm = f"{orig_bucket}/{fnm}"
|
||||||
|
|
||||||
|
return method(self, bucket, fnm, *args, **kwargs)
|
||||||
|
return wrapper
|
||||||
|
|
||||||
def __open__(self):
|
def __open__(self):
|
||||||
try:
|
try:
|
||||||
if self.conn:
|
if self.conn:
|
||||||
|
|
@ -41,7 +82,7 @@ class RAGFlowMinio:
|
||||||
self.conn = Minio(settings.MINIO["host"],
|
self.conn = Minio(settings.MINIO["host"],
|
||||||
access_key=settings.MINIO["user"],
|
access_key=settings.MINIO["user"],
|
||||||
secret_key=settings.MINIO["password"],
|
secret_key=settings.MINIO["password"],
|
||||||
secure=False
|
secure=True
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
logging.exception(
|
logging.exception(
|
||||||
|
|
@ -52,20 +93,28 @@ class RAGFlowMinio:
|
||||||
self.conn = None
|
self.conn = None
|
||||||
|
|
||||||
def health(self):
|
def health(self):
|
||||||
bucket, fnm, binary = "txtxtxtxt1", "txtxtxtxt1", b"_t@@@1"
|
bucket = self.bucket if self.bucket else "ragflow-bucket"
|
||||||
if not self.conn.bucket_exists(bucket):
|
fnm = "_health_check"
|
||||||
self.conn.make_bucket(bucket)
|
if self.prefix_path:
|
||||||
|
fnm = f"{self.prefix_path}/{fnm}"
|
||||||
|
binary = b"_t@@@1"
|
||||||
|
# Don't try to create bucket - it should already exist
|
||||||
|
# if not self.conn.bucket_exists(bucket):
|
||||||
|
# self.conn.make_bucket(bucket)
|
||||||
r = self.conn.put_object(bucket, fnm,
|
r = self.conn.put_object(bucket, fnm,
|
||||||
BytesIO(binary),
|
BytesIO(binary),
|
||||||
len(binary)
|
len(binary)
|
||||||
)
|
)
|
||||||
return r
|
return r
|
||||||
|
|
||||||
def put(self, bucket, fnm, binary, tenant_id=None):
|
@use_default_bucket
|
||||||
|
@use_prefix_path
|
||||||
|
def put(self, bucket, fnm, binary):
|
||||||
for _ in range(3):
|
for _ in range(3):
|
||||||
try:
|
try:
|
||||||
if not self.conn.bucket_exists(bucket):
|
# Note: bucket must already exist - we don't have permission to create buckets
|
||||||
self.conn.make_bucket(bucket)
|
# if not self.conn.bucket_exists(bucket):
|
||||||
|
# self.conn.make_bucket(bucket)
|
||||||
|
|
||||||
r = self.conn.put_object(bucket, fnm,
|
r = self.conn.put_object(bucket, fnm,
|
||||||
BytesIO(binary),
|
BytesIO(binary),
|
||||||
|
|
@ -77,13 +126,17 @@ class RAGFlowMinio:
|
||||||
self.__open__()
|
self.__open__()
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
def rm(self, bucket, fnm, tenant_id=None):
|
@use_default_bucket
|
||||||
|
@use_prefix_path
|
||||||
|
def rm(self, bucket, fnm):
|
||||||
try:
|
try:
|
||||||
self.conn.remove_object(bucket, fnm)
|
self.conn.remove_object(bucket, fnm)
|
||||||
except Exception:
|
except Exception:
|
||||||
logging.exception(f"Fail to remove {bucket}/{fnm}:")
|
logging.exception(f"Fail to remove {bucket}/{fnm}:")
|
||||||
|
|
||||||
def get(self, bucket, filename, tenant_id=None):
|
@use_default_bucket
|
||||||
|
@use_prefix_path
|
||||||
|
def get(self, bucket, filename):
|
||||||
for _ in range(1):
|
for _ in range(1):
|
||||||
try:
|
try:
|
||||||
r = self.conn.get_object(bucket, filename)
|
r = self.conn.get_object(bucket, filename)
|
||||||
|
|
@ -92,9 +145,11 @@ class RAGFlowMinio:
|
||||||
logging.exception(f"Fail to get {bucket}/{filename}")
|
logging.exception(f"Fail to get {bucket}/{filename}")
|
||||||
self.__open__()
|
self.__open__()
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
return None
|
return
|
||||||
|
|
||||||
def obj_exist(self, bucket, filename, tenant_id=None):
|
@use_default_bucket
|
||||||
|
@use_prefix_path
|
||||||
|
def obj_exist(self, bucket, filename):
|
||||||
try:
|
try:
|
||||||
if not self.conn.bucket_exists(bucket):
|
if not self.conn.bucket_exists(bucket):
|
||||||
return False
|
return False
|
||||||
|
|
@ -109,6 +164,7 @@ class RAGFlowMinio:
|
||||||
logging.exception(f"obj_exist {bucket}/{filename} got exception")
|
logging.exception(f"obj_exist {bucket}/{filename} got exception")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@use_default_bucket
|
||||||
def bucket_exists(self, bucket):
|
def bucket_exists(self, bucket):
|
||||||
try:
|
try:
|
||||||
if not self.conn.bucket_exists(bucket):
|
if not self.conn.bucket_exists(bucket):
|
||||||
|
|
@ -122,7 +178,9 @@ class RAGFlowMinio:
|
||||||
logging.exception(f"bucket_exist {bucket} got exception")
|
logging.exception(f"bucket_exist {bucket} got exception")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def get_presigned_url(self, bucket, fnm, expires, tenant_id=None):
|
@use_default_bucket
|
||||||
|
@use_prefix_path
|
||||||
|
def get_presigned_url(self, bucket, fnm, expires):
|
||||||
for _ in range(10):
|
for _ in range(10):
|
||||||
try:
|
try:
|
||||||
return self.conn.get_presigned_url("GET", bucket, fnm, expires)
|
return self.conn.get_presigned_url("GET", bucket, fnm, expires)
|
||||||
|
|
@ -130,8 +188,9 @@ class RAGFlowMinio:
|
||||||
logging.exception(f"Fail to get_presigned {bucket}/{fnm}:")
|
logging.exception(f"Fail to get_presigned {bucket}/{fnm}:")
|
||||||
self.__open__()
|
self.__open__()
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
return None
|
return
|
||||||
|
|
||||||
|
@use_default_bucket
|
||||||
def remove_bucket(self, bucket):
|
def remove_bucket(self, bucket):
|
||||||
try:
|
try:
|
||||||
if self.conn.bucket_exists(bucket):
|
if self.conn.bucket_exists(bucket):
|
||||||
|
|
@ -141,37 +200,3 @@ class RAGFlowMinio:
|
||||||
self.conn.remove_bucket(bucket)
|
self.conn.remove_bucket(bucket)
|
||||||
except Exception:
|
except Exception:
|
||||||
logging.exception(f"Fail to remove bucket {bucket}")
|
logging.exception(f"Fail to remove bucket {bucket}")
|
||||||
|
|
||||||
def copy(self, src_bucket, src_path, dest_bucket, dest_path):
|
|
||||||
try:
|
|
||||||
if not self.conn.bucket_exists(dest_bucket):
|
|
||||||
self.conn.make_bucket(dest_bucket)
|
|
||||||
|
|
||||||
try:
|
|
||||||
self.conn.stat_object(src_bucket, src_path)
|
|
||||||
except Exception as e:
|
|
||||||
logging.exception(f"Source object not found: {src_bucket}/{src_path}, {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
self.conn.copy_object(
|
|
||||||
dest_bucket,
|
|
||||||
dest_path,
|
|
||||||
CopySource(src_bucket, src_path),
|
|
||||||
)
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception:
|
|
||||||
logging.exception(f"Fail to copy {src_bucket}/{src_path} -> {dest_bucket}/{dest_path}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def move(self, src_bucket, src_path, dest_bucket, dest_path):
|
|
||||||
try:
|
|
||||||
if self.copy(src_bucket, src_path, dest_bucket, dest_path):
|
|
||||||
self.rm(src_bucket, src_path)
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
logging.error(f"Copy failed, move aborted: {src_bucket}/{src_path}")
|
|
||||||
return False
|
|
||||||
except Exception:
|
|
||||||
logging.exception(f"Fail to move {src_bucket}/{src_path} -> {dest_bucket}/{dest_path}")
|
|
||||||
return False
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue