diff --git a/lightrag/kg/milvus_impl.py b/lightrag/kg/milvus_impl.py
index ef73a206..a6e1b66e 100644
--- a/lightrag/kg/milvus_impl.py
+++ b/lightrag/kg/milvus_impl.py
@@ -47,7 +47,7 @@ class MilvusVectorDBStorage(BaseVectorStorage):
                 FieldSchema(
                     name="file_path",
                     dtype=DataType.VARCHAR,
-                    max_length=1024,
+                    max_length=4090,
                     nullable=True,
                 ),
             ]
@@ -64,7 +64,7 @@ class MilvusVectorDBStorage(BaseVectorStorage):
                 FieldSchema(
                     name="file_path",
                     dtype=DataType.VARCHAR,
-                    max_length=1024,
+                    max_length=4090,
                     nullable=True,
                 ),
             ]
diff --git a/lightrag/operate.py b/lightrag/operate.py
index a3075210..78e11c22 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -968,16 +968,7 @@ async def _merge_nodes_then_upsert(
     source_id = GRAPH_FIELD_SEP.join(
         set([dp["source_id"] for dp in nodes_data] + already_source_ids)
     )
-    file_path = GRAPH_FIELD_SEP.join(
-        set(
-            [
-                dp.get("file_path", "unknown_source")
-                for dp in nodes_data
-                if dp.get("file_path")
-            ]
-            + [fp for fp in already_file_paths if fp]
-        )
-    )
+    file_path = build_file_path(already_file_paths, nodes_data, entity_name)
 
     force_llm_summary_on_merge = global_config["force_llm_summary_on_merge"]
 
@@ -3141,6 +3132,44 @@ async def kg_query_with_keywords(
     return response
 
 
+def build_file_path(already_file_paths, data_list, target):
+    # set: deduplication
+    file_paths_set = {fp for fp in already_file_paths if fp}
+
+    # string: filter empty value and keep file order in already_file_paths
+    file_paths = GRAPH_FIELD_SEP.join(fp for fp in already_file_paths if fp)
+    # ignored file_paths
+    file_paths_ignore = ""
+    # add file_paths
+    for dp in data_list:
+        cur_file_path = dp.get("file_path")
+        # empty
+        if not cur_file_path:
+            continue
+
+        # skip duplicate item
+        if cur_file_path in file_paths_set:
+            continue
+        # add
+        file_paths_set.add(cur_file_path)
+
+        # check the length
+        if len(file_paths) + len(GRAPH_FIELD_SEP + cur_file_path) < 4090:
+            # append
+            file_paths += (
+                GRAPH_FIELD_SEP + cur_file_path if file_paths else cur_file_path
+            )
+        else:
+            # ignore
+            file_paths_ignore += GRAPH_FIELD_SEP + cur_file_path
+
+    if file_paths_ignore:
+        logger.warning(
+            f"Length of file_path exceeds {target}, ignoring new file: {file_paths_ignore}"
+        )
+    return file_paths
+
+
 # TODO: Deprecated, use user_prompt in QueryParam instead
 async def query_with_keywords(
     query: str,