diff --git a/conf/service_conf.yaml b/conf/service_conf.yaml index b57f58206..fd0550d80 100644 --- a/conf/service_conf.yaml +++ b/conf/service_conf.yaml @@ -9,6 +9,7 @@ mysql: port: 5455 max_connections: 900 stale_timeout: 300 + max_allowed_packet: 1073741824 minio: user: 'rag_flow' password: 'infini_rag_flow' @@ -58,6 +59,11 @@ redis: # secret: 'secret' # tenant_id: 'tenant_id' # container_name: 'container_name' +# The OSS object storage uses the MySQL configuration above by default. If you need to switch to another object storage service, please uncomment and configure the following parameters. +# opendal: +# schema: 'mysql' # Storage type, such as s3, oss, azure, etc. +# config: +# oss_table: 'your_table_name' # user_default_llm: # factory: 'Tongyi-Qianwen' # api_key: 'sk-xxxxxxxxxxxxx' diff --git a/pyproject.toml b/pyproject.toml index 72aa7ba76..34ce23f20 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,6 +62,7 @@ dependencies = [ "opencv-python==4.10.0.84", "opencv-python-headless==4.10.0.84", "openpyxl>=3.1.0,<4.0.0", + "opendal>=0.45.0,<0.46.0", "ormsgpack==1.5.0", "pandas>=2.2.0,<3.0.0", "pdfplumber==0.10.4", @@ -71,6 +72,7 @@ dependencies = [ "psycopg2-binary==2.9.9", "pyclipper==1.3.0.post5", "pycryptodomex==3.20.0", + "pymysql>=1.1.1,<2.0.0", "pypdf>=5.0.0,<6.0.0", "python-dotenv==1.0.1", "python-dateutil==2.8.2", @@ -84,6 +86,7 @@ dependencies = [ "replicate==0.31.0", "roman-numbers==1.0.2", "ruamel-base==1.0.0", + "ruamel-yaml>=0.18.6,<0.19.0", "scholarly==1.7.11", "scikit-learn==1.5.0", "selenium==4.22.0", diff --git a/rag/utils/opendal_conn.py b/rag/utils/opendal_conn.py new file mode 100644 index 000000000..9061bde16 --- /dev/null +++ b/rag/utils/opendal_conn.py @@ -0,0 +1,115 @@ +import opendal +import logging +import pymysql +import yaml + +from rag.utils import singleton + +SERVICE_CONF_PATH = "conf/service_conf.yaml" + +CREATE_TABLE_SQL = """ +CREATE TABLE IF NOT EXISTS `{}` ( + `key` VARCHAR(255) PRIMARY KEY, + `value` LONGBLOB, + `created_at` TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + `updated_at` TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP +); +""" +SET_MAX_ALLOWED_PACKET_SQL = """ +SET GLOBAL max_allowed_packet={} +""" + + +def get_opendal_config_from_yaml(yaml_path=SERVICE_CONF_PATH): + try: + with open(yaml_path, 'r') as f: + config = yaml.safe_load(f) + + opendal_config = config.get('opendal', {}) + kwargs = {} + if opendal_config.get("schema") == 'mysql': + mysql_config = config.get('mysql', {}) + kwargs = { + "schema": "mysql", + "host": mysql_config.get("host", "127.0.0.1"), + "port": str(mysql_config.get("port", 3306)), + "user": mysql_config.get("user", "root"), + "password": mysql_config.get("password", ""), + "database": mysql_config.get("name", "test_open_dal"), + "table": opendal_config.get("config").get("table", "opendal_storage") + } + kwargs["connection_string"] = f"mysql://{kwargs['user']}:{kwargs['password']}@{kwargs['host']}:{kwargs['port']}/{kwargs['database']}" + else: + schema = opendal_config.get("schema") + config_data = opendal_config.get("config", {}) + kwargs = {"schema": schema, **config_data} + logging.info("Loaded OpenDAL configuration from yaml: %s", kwargs) + return kwargs + except Exception as e: + logging.error("Failed to load OpenDAL configuration from yaml: %s", str(e)) + raise + + +@singleton +class OpenDALStorage: + def __init__(self): + self._kwargs = get_opendal_config_from_yaml() + self._schema = self._kwargs.get('schema', 'mysql') + if self._schema == 'mysql': + self.init_db_config() + self.init_opendal_mysql_table() + self._operator = opendal.Operator(self._schema, **self._kwargs) + + logging.info("OpenDALStorage initialized successfully") + + def put(self, bucket, fnm, binary): + self._operator.write(f"{bucket}/{fnm}", binary) + + def get(self, bucket, fnm): + return self._operator.read(f"{bucket}/{fnm}") + + def rm(self, bucket, fnm): + self._operator.delete(f"{bucket}/{fnm}") + self._operator.__init__() + + def scan(self, bucket, fnm): + return self._operator.scan(f"{bucket}/{fnm}") + + def obj_exist(self, bucket, fnm): + return self._operator.exists(f"{bucket}/{fnm}") + + + def init_db_config(self): + try: + conn = pymysql.connect( + host=self._kwargs['host'], + port=int(self._kwargs['port']), + user=self._kwargs['user'], + password=self._kwargs['password'], + database=self._kwargs['database'] + ) + cursor = conn.cursor() + max_packet = self._kwargs.get('max_allowed_packet', 4194304) # Default to 4MB if not specified + cursor.execute(SET_MAX_ALLOWED_PACKET_SQL.format(max_packet)) + conn.commit() + cursor.close() + conn.close() + logging.info(f"Database configuration initialized with max_allowed_packet={max_packet}") + except Exception as e: + logging.error(f"Failed to initialize database configuration: {str(e)}") + raise + + def init_opendal_mysql_table(self): + conn = pymysql.connect( + host=self._kwargs['host'], + port=int(self._kwargs['port']), + user=self._kwargs['user'], + password=self._kwargs['password'], + database=self._kwargs['database'] + ) + cursor = conn.cursor() + cursor.execute(CREATE_TABLE_SQL.format(self._kwargs['table'])) + conn.commit() + cursor.close() + conn.close() + logging.info(f"Table `{self._kwargs['table']}` initialized.") diff --git a/rag/utils/storage_factory.py b/rag/utils/storage_factory.py index 63587b3b0..4ac091f85 100644 --- a/rag/utils/storage_factory.py +++ b/rag/utils/storage_factory.py @@ -20,6 +20,7 @@ from enum import Enum from rag.utils.azure_sas_conn import RAGFlowAzureSasBlob from rag.utils.azure_spn_conn import RAGFlowAzureSpnBlob from rag.utils.minio_conn import RAGFlowMinio +from rag.utils.opendal_conn import OpenDALStorage from rag.utils.s3_conn import RAGFlowS3 from rag.utils.oss_conn import RAGFlowOSS @@ -30,6 +31,7 @@ class Storage(Enum): AZURE_SAS = 3 AWS_S3 = 4 OSS = 5 + OPENDAL = 6 class StorageFactory: @@ -39,6 +41,7 @@ class StorageFactory: Storage.AZURE_SAS: RAGFlowAzureSasBlob, Storage.AWS_S3: RAGFlowS3, Storage.OSS: RAGFlowOSS, + Storage.OPENDAL: OpenDALStorage } @classmethod diff --git a/uv.lock b/uv.lock index 0246b27d2..81ef16849 100644 --- a/uv.lock +++ b/uv.lock @@ -3662,6 +3662,30 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/26/d0/22f68eb23eea053a31655960f133c0be9726c6a881547e6e9e7e2a946c4f/opencv_python_headless-4.10.0.84-cp37-abi3-win_amd64.whl", hash = "sha256:afcf28bd1209dd58810d33defb622b325d3cbe49dcd7a43a902982c33e5fad05" }, ] +[[package]] +name = "opendal" +version = "0.45.20" +source = { registry = "https://mirrors.aliyun.com/pypi/simple" } +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/2f/3f/927dfe1349ae58b9238b8eafba747af648d660a9425f486dda01a10f0b78/opendal-0.45.20.tar.gz", hash = "sha256:9f6f90d9e9f9d6e9e5a34aa7729169ef34d2f1869ad1e01ddc39b1c0ce0c9405" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/f7/d9/b74575762bd9178b0498125f270268e0fb122ee991188e053048da7f002c/opendal-0.45.20-cp310-cp310-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:d6069cef67f501eda221da63320bd1291aee967f5f8678ccee9e6e566ab37c78" }, + { url = "https://mirrors.aliyun.com/pypi/packages/56/f6/0af7d8a4afe5bae6222c4715f0563fa8c257f0525802da47120e28314353/opendal-0.45.20-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c52c4bf9433a3fa17d1f7b18f386a8f601c4b41e3fae9a839d0a861867d6086a" }, + { url = "https://mirrors.aliyun.com/pypi/packages/96/16/cf0cfc0838c7837f5642824738ad57f84cee658b4cfdd2b25fdfb52ca8a7/opendal-0.45.20-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:088bc9b20c5f07bbb19a9ff45c32dd3d42cf2d0b4ef40a2319ca27cdc635bf0f" }, + { url = "https://mirrors.aliyun.com/pypi/packages/b0/76/e903436877895fcf948e36aa728b4b56a3a600c4fd3297d8e4bc38a843be/opendal-0.45.20-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:55efb4388fa03f309de497bf9b9854377fc4045da069c72c9d2df21d24c686cb" }, + { url = "https://mirrors.aliyun.com/pypi/packages/34/10/7863a90a592ed6bfb2ddde104db23a00586004e2197f86a255ad9f8a9401/opendal-0.45.20-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:49c966cda40dc6b7b100ea6150d2f29e01ed7db694c5a5168c5fc451872ec77c" }, + { url = "https://mirrors.aliyun.com/pypi/packages/b4/a3/b77497101e320bcaebb7e99c43d61ca1886795c6a83001d4426cdbc3683d/opendal-0.45.20-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:e81af55e1d8c145119dfa4c9cacd1fd60c1c1fba2207ec5064cb6baae8c3c86b" }, + { url = "https://mirrors.aliyun.com/pypi/packages/fc/36/21495e4a405d47ece52df98c323ba9467f43e0641e04819ab5732bf0f370/opendal-0.45.20-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3bbdfcb6840ab8bbd29c36a2a329c1f691023b3cd6a26f8a285dc89f39526017" }, + { url = "https://mirrors.aliyun.com/pypi/packages/50/28/bb822cad3f3ef15836227751dad46554c499bbefcf0eb34b4cc7e9975e9b/opendal-0.45.20-cp310-cp310-win_amd64.whl", hash = "sha256:e3987c4766a3611ea8cb3a216f21d083ac3e7fa91eb2ff7c0ebe5dc6e6958cce" }, + { url = "https://mirrors.aliyun.com/pypi/packages/84/77/6427e16b8630f0cc71f4a1b01648ed3264f1e04f1f6d9b5d09e5c6a4dd2f/opendal-0.45.20-cp311-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:35acdd8001e4a741532834fdbff3020ffb10b40028bb49fbe93c4f8197d66d8c" }, + { url = "https://mirrors.aliyun.com/pypi/packages/12/1f/83e415334739f1ab4dba55cdd349abf0b66612249055afb422a354b96ac8/opendal-0.45.20-cp311-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:629bfe8d384364bced6cbeb01f49b99779fa5151c68048a1869ff645ddcfcb25" }, + { url = "https://mirrors.aliyun.com/pypi/packages/49/94/c5de6ed54a02d7413636c2ccefa71d8dd09c2ada1cd6ecab202feb1fdeda/opendal-0.45.20-cp311-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d12cc5ac7e441fb93d86d1673112d9fb08580fc3226f864434f4a56a72efec53" }, + { url = "https://mirrors.aliyun.com/pypi/packages/c6/83/713a1e1de8cbbd69af50e26644bbdeef3c1068b89f442417376fa3c0f591/opendal-0.45.20-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:45a3adae1f473052234fc4054a6f210df3ded9aff10db8d545d0a37eff3b13cc" }, + { url = "https://mirrors.aliyun.com/pypi/packages/c7/78/c9651e753aaf6eb61887ca372a3f9c2ae57dae03c3159d24deaf018c26dc/opendal-0.45.20-cp311-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:d8947857052c85a4b0e251d50e23f5f68f0cdd9e509e32e614a5e4b2fc7424c4" }, + { url = "https://mirrors.aliyun.com/pypi/packages/3c/9d/5d8c20c0fc93df5e349e5694167de30afdc54c5755704cc64764a6cbb309/opendal-0.45.20-cp311-abi3-musllinux_1_1_armv7l.whl", hash = "sha256:891d2f9114efeef648973049ed15e56477e8feb9e48b540bd8d6105ea22a253c" }, + { url = "https://mirrors.aliyun.com/pypi/packages/21/39/05262f748a2085522e0c85f03eab945589313dc9caedc002872c39162776/opendal-0.45.20-cp311-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:539de9b825f6783d6289d88c0c9ac5415daa4d892d761e3540c565bda51e8997" }, + { url = "https://mirrors.aliyun.com/pypi/packages/74/83/cc7c6de29b0a7585cd445258d174ca204d37729c3874ad08e515b0bf331c/opendal-0.45.20-cp311-abi3-win_amd64.whl", hash = "sha256:145efd56aa33b493d5b652c3e4f5ae5097ab69d38c132d80f108e9f5c1e4d863" }, +] + [[package]] name = "openpyxl" version = "3.1.5" @@ -4891,6 +4915,7 @@ dependencies = [ { name = "openai" }, { name = "opencv-python" }, { name = "opencv-python-headless" }, + { name = "opendal" }, { name = "openpyxl" }, { name = "opensearch-py" }, { name = "ormsgpack" }, @@ -5041,6 +5066,7 @@ requires-dist = [ { name = "openai", specifier = "==1.45.0" }, { name = "opencv-python", specifier = "==4.10.0.84" }, { name = "opencv-python-headless", specifier = "==4.10.0.84" }, + { name = "opendal", specifier = ">=0.45.0,<0.46.0" }, { name = "openpyxl", specifier = ">=3.1.0,<4.0.0" }, { name = "opensearch-py", specifier = "==2.7.1" }, { name = "ormsgpack", specifier = "==1.5.0" },