diff --git a/rag/ppdet/benchmark_utils.py b/rag/ppdet/benchmark_utils.py index adf362179..118e5efe1 100644 --- a/rag/ppdet/benchmark_utils.py +++ b/rag/ppdet/benchmark_utils.py @@ -158,6 +158,7 @@ class PaddleInferBenchmark(object): return: config_status(dict): dict style config info """ + if isinstance(config, paddle_infer.Config): config_status = {} config_status['runtime_device'] = "gpu" if config.use_gpu( diff --git a/rag/ppdet/clrnet_postprocess.py b/rag/ppdet/clrnet_postprocess.py index 8e59e9c02..b689ea4da 100644 --- a/rag/ppdet/clrnet_postprocess.py +++ b/rag/ppdet/clrnet_postprocess.py @@ -15,7 +15,6 @@ import numpy as np import paddle import paddle.nn as nn -from scipy.special import softmax from scipy.interpolate import InterpolatedUnivariateSpline diff --git a/rag/ppdet/det_keypoint_unite_infer.py b/rag/ppdet/det_keypoint_unite_infer.py index 7b57714d1..1ed93cd89 100644 --- a/rag/ppdet/det_keypoint_unite_infer.py +++ b/rag/ppdet/det_keypoint_unite_infer.py @@ -26,6 +26,7 @@ from infer import Detector, DetectorPicoDet, PredictConfig, print_arguments, get from keypoint_infer import KeyPointDetector, PredictConfig_KeyPoint from visualize import visualize_pose from benchmark_utils import PaddleInferBenchmark + from utils import get_current_memory_mb from keypoint_postprocess import translate_to_ori_images diff --git a/rag/ppdet/infer.py b/rag/ppdet/infer.py index a5ebe975d..fcdc8a2c3 100644 --- a/rag/ppdet/infer.py +++ b/rag/ppdet/infer.py @@ -19,6 +19,7 @@ import json from pathlib import Path from functools import reduce + import cv2 import numpy as np import math @@ -28,6 +29,7 @@ from paddle.inference import create_predictor import sys # add deploy path of PaddleDetection to sys.path + parent_path = os.path.abspath(os.path.join(__file__, *(['..']))) sys.path.insert(0, parent_path) diff --git a/rag/ppdet/keypoint_infer.py b/rag/ppdet/keypoint_infer.py index fc912bf53..2c80ff484 100644 --- a/rag/ppdet/keypoint_infer.py +++ b/rag/ppdet/keypoint_infer.py @@ -19,6 +19,7 @@ import glob from functools import reduce from PIL import Image + import cv2 import math import numpy as np @@ -35,6 +36,7 @@ from keypoint_postprocess import HrHRNetPostProcess, HRNetPostProcess from visualize import visualize_pose from paddle.inference import Config from paddle.inference import create_predictor + from utils import argsparser, Timer, get_current_memory_mb from benchmark_utils import PaddleInferBenchmark from infer import Detector, get_test_images, print_arguments diff --git a/rag/ppdet/mot_centertrack_infer.py b/rag/ppdet/mot_centertrack_infer.py index 3442ef534..af28d6403 100644 --- a/rag/ppdet/mot_centertrack_infer.py +++ b/rag/ppdet/mot_centertrack_infer.py @@ -371,6 +371,7 @@ class CenterTrack(Detector): online_scores, frame_id=frame_id, ids2names=ids2names) + if seq_name is None: seq_name = image_list[0].split('/')[-2] save_dir = os.path.join(self.output_dir, seq_name) @@ -442,12 +443,12 @@ class CenterTrack(Detector): if cv2.waitKey(1) & 0xFF == ord('q'): break + if self.save_mot_txts: result_filename = os.path.join( self.output_dir, video_out_name.split('.')[-2] + '.txt') write_mot_results(result_filename, results, data_type, num_classes) - writer.release() diff --git a/rag/ppdet/mot_jde_infer.py b/rag/ppdet/mot_jde_infer.py index 793d5271b..4d1e6fe82 100644 --- a/rag/ppdet/mot_jde_infer.py +++ b/rag/ppdet/mot_jde_infer.py @@ -114,6 +114,7 @@ class JDE_Detector(Detector): tracked_thresh=tracked_thresh, metric_type=metric_type) + def postprocess(self, inputs, result): # postprocess output of predictor np_boxes = result['pred_dets'] @@ -247,6 +248,7 @@ class JDE_Detector(Detector): online_scores, frame_id=frame_id, ids2names=ids2names) + if seq_name is None: seq_name = image_list[0].split('/')[-2] save_dir = os.path.join(self.output_dir, seq_name) @@ -255,6 +257,7 @@ class JDE_Detector(Detector): cv2.imwrite( os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), im) + mot_results.append([online_tlwhs, online_scores, online_ids]) return mot_results diff --git a/rag/ppdet/mot_keypoint_unite_infer.py b/rag/ppdet/mot_keypoint_unite_infer.py index d69622b1a..d129ac73b 100644 --- a/rag/ppdet/mot_keypoint_unite_infer.py +++ b/rag/ppdet/mot_keypoint_unite_infer.py @@ -16,6 +16,7 @@ import os import json import cv2 import math + import numpy as np import paddle import yaml diff --git a/rag/svr/parse_user_docs.py b/rag/svr/parse_user_docs.py index 4a22c4156..188662e6b 100644 --- a/rag/svr/parse_user_docs.py +++ b/rag/svr/parse_user_docs.py @@ -28,6 +28,7 @@ from rag.settings import cron_logger, DOC_MAXIMUM_SIZE from rag.utils import ELASTICSEARCH from rag.utils import MINIO from rag.utils import rmSpace, findMaxTm + from rag.nlp import huchunk, huqie, search from io import BytesIO import pandas as pd @@ -106,18 +107,18 @@ def build(row, cvmdl): set_progress(row["id"], -1, "File size exceeds( <= %dMb )" % (int(DOC_MAXIMUM_SIZE / 1024 / 1024))) return [] - # If just change the kb for doc - # res = ELASTICSEARCH.search(Q("term", doc_id=row["id"]), idxnm=search.index_name(row["tenant_id"])) - # if ELASTICSEARCH.getTotal(res) > 0: - # ELASTICSEARCH.updateScriptByQuery(Q("term", doc_id=row["id"]), - # scripts=""" - # if(!ctx._source.kb_id.contains('%s')) - # ctx._source.kb_id.add('%s'); - # """ % (str(row["kb_id"]), str(row["kb_id"])), - # idxnm=search.index_name(row["tenant_id"]) - # ) - # set_progress(row["id"], 1, "Done") - # return [] + + res = ELASTICSEARCH.search(Q("term", doc_id=row["id"])) + if ELASTICSEARCH.getTotal(res) > 0: + ELASTICSEARCH.updateScriptByQuery(Q("term", doc_id=row["id"]), + scripts=""" + if(!ctx._source.kb_id.contains('%s')) + ctx._source.kb_id.add('%s'); + """ % (str(row["kb_id"]), str(row["kb_id"])), + idxnm=search.index_name(row["tenant_id"]) + ) + set_progress(row["id"], 1, "Done") + return [] random.seed(time.time()) set_progress(row["id"], random.randint(0, 20) / @@ -135,7 +136,9 @@ def build(row, cvmdl): row["id"], -1, f"Internal server error: %s" % str(e).replace( "'", "")) + cron_logger.warn("Chunkking {}/{}: {}".format(row["location"], row["name"], str(e))) + return [] if not obj.text_chunks and not obj.table_chunks: diff --git a/rag/utils/__init__.py b/rag/utils/__init__.py index 965c56a54..9898d19d5 100644 --- a/rag/utils/__init__.py +++ b/rag/utils/__init__.py @@ -40,7 +40,7 @@ def findMaxDt(fnm): print("WARNING: can't find " + fnm) return m - + def findMaxTm(fnm): m = 0 try: @@ -58,6 +58,7 @@ def findMaxTm(fnm): print("WARNING: can't find " + fnm) return m + def num_tokens_from_string(string: str) -> int: """Returns the number of tokens in a text string.""" encoding = tiktoken.get_encoding('cl100k_base') diff --git a/web_server/apps/document_app.py b/web_server/apps/document_app.py index 67bdcbc30..9be9cfde9 100644 --- a/web_server/apps/document_app.py +++ b/web_server/apps/document_app.py @@ -276,4 +276,5 @@ def change_parser(): return get_json_result(data=True) except Exception as e: - return server_error_response(e) \ No newline at end of file + return server_error_response(e) + diff --git a/web_server/apps/user_app.py b/web_server/apps/user_app.py index 33d851b1c..81946074e 100644 --- a/web_server/apps/user_app.py +++ b/web_server/apps/user_app.py @@ -183,7 +183,9 @@ def rollback_user_registration(user_id): except Exception as e: pass + def user_register(user_id, user): + user_id = get_uuid() user["id"] = user_id tenant = { diff --git a/web_server/db/db_models.py b/web_server/db/db_models.py index aed36f8f1..62d92b475 100644 --- a/web_server/db/db_models.py +++ b/web_server/db/db_models.py @@ -467,6 +467,7 @@ class Knowledgebase(DataBaseModel): doc_num = IntegerField(default=0) token_num = IntegerField(default=0) chunk_num = IntegerField(default=0) + parser_id = CharField(max_length=32, null=False, help_text="default parser ID") status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted,1: validate)", default="1") diff --git a/web_server/db/services/document_service.py b/web_server/db/services/document_service.py index e9e325bbc..38b1cd559 100644 --- a/web_server/db/services/document_service.py +++ b/web_server/db/services/document_service.py @@ -85,4 +85,5 @@ class DocumentService(CommonService): cls.model.id == doc_id).execute() if num == 0:raise LookupError("Document not found which is supposed to be there") num = Knowledgebase.update(token_num=Knowledgebase.token_num+token_num, chunk_num=Knowledgebase.chunk_num+chunk_num).where(Knowledgebase.id==kb_id).execute() - return num \ No newline at end of file + return num + diff --git a/web_server/db/services/llm_service.py b/web_server/db/services/llm_service.py index ea67c539c..350106e36 100644 --- a/web_server/db/services/llm_service.py +++ b/web_server/db/services/llm_service.py @@ -31,7 +31,6 @@ class LLMService(CommonService): model = LLM - class TenantLLMService(CommonService): model = TenantLLM @@ -51,3 +50,4 @@ class TenantLLMService(CommonService): if not objs:return return objs[0] +