Merge branch 'separator_file_path'
This commit is contained in:
commit
8e7014d366
2 changed files with 41 additions and 12 deletions
|
|
@ -47,7 +47,7 @@ class MilvusVectorDBStorage(BaseVectorStorage):
|
|||
FieldSchema(
|
||||
name="file_path",
|
||||
dtype=DataType.VARCHAR,
|
||||
max_length=1024,
|
||||
max_length=4090,
|
||||
nullable=True,
|
||||
),
|
||||
]
|
||||
|
|
@ -64,7 +64,7 @@ class MilvusVectorDBStorage(BaseVectorStorage):
|
|||
FieldSchema(
|
||||
name="file_path",
|
||||
dtype=DataType.VARCHAR,
|
||||
max_length=1024,
|
||||
max_length=4090,
|
||||
nullable=True,
|
||||
),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -968,16 +968,7 @@ async def _merge_nodes_then_upsert(
|
|||
source_id = GRAPH_FIELD_SEP.join(
|
||||
set([dp["source_id"] for dp in nodes_data] + already_source_ids)
|
||||
)
|
||||
file_path = GRAPH_FIELD_SEP.join(
|
||||
set(
|
||||
[
|
||||
dp.get("file_path", "unknown_source")
|
||||
for dp in nodes_data
|
||||
if dp.get("file_path")
|
||||
]
|
||||
+ [fp for fp in already_file_paths if fp]
|
||||
)
|
||||
)
|
||||
file_path = build_file_path(already_file_paths, nodes_data, entity_name)
|
||||
|
||||
force_llm_summary_on_merge = global_config["force_llm_summary_on_merge"]
|
||||
|
||||
|
|
@ -3141,6 +3132,44 @@ async def kg_query_with_keywords(
|
|||
return response
|
||||
|
||||
|
||||
def build_file_path(already_file_paths, data_list, target):
|
||||
# set: deduplication
|
||||
file_paths_set = {fp for fp in already_file_paths if fp}
|
||||
|
||||
# string: filter empty value and keep file order in already_file_paths
|
||||
file_paths = GRAPH_FIELD_SEP.join(fp for fp in already_file_paths if fp)
|
||||
# ignored file_paths
|
||||
file_paths_ignore = ""
|
||||
# add file_paths
|
||||
for dp in data_list:
|
||||
cur_file_path = dp.get("file_path")
|
||||
# empty
|
||||
if not cur_file_path:
|
||||
continue
|
||||
|
||||
# skip duplicate item
|
||||
if cur_file_path in file_paths_set:
|
||||
continue
|
||||
# add
|
||||
file_paths_set.add(cur_file_path)
|
||||
|
||||
# check the length
|
||||
if len(file_paths) + len(GRAPH_FIELD_SEP + cur_file_path) < 4090:
|
||||
# append
|
||||
file_paths += (
|
||||
GRAPH_FIELD_SEP + cur_file_path if file_paths else cur_file_path
|
||||
)
|
||||
else:
|
||||
# ignore
|
||||
file_paths_ignore += GRAPH_FIELD_SEP + cur_file_path
|
||||
|
||||
if file_paths_ignore:
|
||||
logger.warning(
|
||||
f"Length of file_path exceeds {target}, ignoring new file: {file_paths_ignore}"
|
||||
)
|
||||
return file_paths
|
||||
|
||||
|
||||
# TODO: Deprecated, use user_prompt in QueryParam instead
|
||||
async def query_with_keywords(
|
||||
query: str,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue