diff --git a/lightrag/constants.py b/lightrag/constants.py index 26205689..a3773f63 100644 --- a/lightrag/constants.py +++ b/lightrag/constants.py @@ -26,6 +26,9 @@ DEFAULT_RELATED_CHUNK_NUMBER = 5 # Separator for graph fields GRAPH_FIELD_SEP = "" +# File path configuration for vector and graph database +DEFAULT_MAX_FILE_PATH_LENGTH = 4090 + # Logging configuration defaults DEFAULT_LOG_MAX_BYTES = 10485760 # Default 10MB DEFAULT_LOG_BACKUP_COUNT = 5 # Default 5 backups diff --git a/lightrag/kg/milvus_impl.py b/lightrag/kg/milvus_impl.py index a6e1b66e..55e5f9eb 100644 --- a/lightrag/kg/milvus_impl.py +++ b/lightrag/kg/milvus_impl.py @@ -5,6 +5,7 @@ from dataclasses import dataclass import numpy as np from lightrag.utils import logger, compute_mdhash_id from ..base import BaseVectorStorage +from ..constants import DEFAULT_MAX_FILE_PATH_LENGTH import pipmaster as pm if not pm.is_installed("pymilvus"): @@ -47,7 +48,7 @@ class MilvusVectorDBStorage(BaseVectorStorage): FieldSchema( name="file_path", dtype=DataType.VARCHAR, - max_length=4090, + max_length=DEFAULT_MAX_FILE_PATH_LENGTH, nullable=True, ), ] @@ -64,7 +65,7 @@ class MilvusVectorDBStorage(BaseVectorStorage): FieldSchema( name="file_path", dtype=DataType.VARCHAR, - max_length=4090, + max_length=DEFAULT_MAX_FILE_PATH_LENGTH, nullable=True, ), ] diff --git a/lightrag/operate.py b/lightrag/operate.py index 78e11c22..d25a899f 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -43,6 +43,7 @@ from .constants import ( DEFAULT_MAX_RELATION_TOKENS, DEFAULT_MAX_TOTAL_TOKENS, DEFAULT_RELATED_CHUNK_NUMBER, + DEFAULT_MAX_FILE_PATH_LENGTH, ) from .kg.shared_storage import get_storage_keyed_lock import time @@ -3154,7 +3155,10 @@ def build_file_path(already_file_paths, data_list, target): file_paths_set.add(cur_file_path) # check the length - if len(file_paths) + len(GRAPH_FIELD_SEP + cur_file_path) < 4090: + if ( + len(file_paths) + len(GRAPH_FIELD_SEP + cur_file_path) + < DEFAULT_MAX_FILE_PATH_LENGTH + ): # append file_paths += ( GRAPH_FIELD_SEP + cur_file_path if file_paths else cur_file_path