diff --git a/rag/nlp/search.py b/rag/nlp/search.py index 1ca70f678..a7e9189e0 100644 --- a/rag/nlp/search.py +++ b/rag/nlp/search.py @@ -389,6 +389,9 @@ class Dealer: "topk": top, "similarity": similarity_threshold, "available_int": 1, + "fields":["docnm_kwd", "content_ltks", "kb_id", "img_id", "title_tks", "important_kwd", "position_int", + "doc_id", "page_num_int", "top_int", "create_timestamp_flt", "knowledge_graph_kwd", + "question_kwd", "question_tks", "doc_type_kwd","available_int", "content_with_weight","mom_id", PAGERANK_FLD, TAG_FLD] } if isinstance(tenant_ids, str): @@ -469,6 +472,7 @@ class Dealer: "vector": chunk.get(vector_column, zero_vector), "positions": position_int, "doc_type_kwd": chunk.get("doc_type_kwd", ""), + "mom_id": chunk.get("mom_id", ""), } if highlight and sres.highlight: if id in sres.highlight: diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py index b08aa7524..62693f24f 100644 --- a/rag/svr/task_executor.py +++ b/rag/svr/task_executor.py @@ -727,17 +727,17 @@ async def insert_es(task_id, task_tenant_id, task_dataset_id, chunks, progress_c if not mom: continue id = xxhash.xxh64(mom.encode("utf-8")).hexdigest() + ck["mom_id"] = id if id in mother_ids: continue mother_ids.add(id) - ck["mom_id"] = id mom_ck = copy.deepcopy(ck) mom_ck["id"] = id mom_ck["content_with_weight"] = mom mom_ck["available_int"] = 0 flds = list(mom_ck.keys()) for fld in flds: - if fld not in ["id", "content_with_weight", "doc_id", "kb_id", "available_int", "position_int"]: + if fld not in ["id", "content_with_weight", "doc_id", "docnm_kwd", "kb_id", "available_int", "position_int"]: del mom_ck[fld] mothers.append(mom_ck)