From 1c7d0599f5e888512ce2941f8cbeaee6ef9e7711 Mon Sep 17 00:00:00 2001 From: buua436 Date: Tue, 9 Dec 2025 17:16:37 +0800 Subject: [PATCH] update --- api/utils/api_utils.py | 1 + deepdoc/parser/pdf_parser.py | 1 + deepdoc/vision/t_ocr.py | 2 ++ graphrag/entity_resolution.py | 2 ++ graphrag/general/extractor.py | 2 ++ graphrag/general/mind_map_extractor.py | 1 + graphrag/utils.py | 1 + rag/flow/parser/parser.py | 2 ++ rag/flow/splitter/splitter.py | 2 ++ rag/svr/task_executor.py | 3 +++ 10 files changed, 17 insertions(+) diff --git a/api/utils/api_utils.py b/api/utils/api_utils.py index 53cb1ce02..6518e9c61 100644 --- a/api/utils/api_utils.py +++ b/api/utils/api_utils.py @@ -707,6 +707,7 @@ async def is_strong_enough(chat_model, embedding_model): try: await asyncio.gather(*tasks, return_exceptions=False) except Exception as e: + logging.error(f"Pressure test failed: {e}") for t in tasks: t.cancel() await asyncio.gather(*tasks, return_exceptions=True) diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py index 6b8a75a8d..4fd16df71 100644 --- a/deepdoc/parser/pdf_parser.py +++ b/deepdoc/parser/pdf_parser.py @@ -1150,6 +1150,7 @@ class RAGFlowPdfParser: try: await asyncio.gather(*tasks, return_exceptions=False) except Exception as e: + logging.error(f"Error in OCR: {e}") for t in tasks: t.cancel() await asyncio.gather(*tasks, return_exceptions=True) diff --git a/deepdoc/vision/t_ocr.py b/deepdoc/vision/t_ocr.py index 347a3e9a8..d3b33b122 100644 --- a/deepdoc/vision/t_ocr.py +++ b/deepdoc/vision/t_ocr.py @@ -15,6 +15,7 @@ # import asyncio +import logging import os import sys sys.path.insert( @@ -78,6 +79,7 @@ def main(args): try: await asyncio.gather(*tasks, return_exceptions=False) except Exception as e: + logging.error("OCR tasks failed: {}".format(e)) for t in tasks: t.cancel() await asyncio.gather(*tasks, return_exceptions=True) diff --git a/graphrag/entity_resolution.py b/graphrag/entity_resolution.py index 9e99fe941..d81cfaf83 100644 --- a/graphrag/entity_resolution.py +++ b/graphrag/entity_resolution.py @@ -146,6 +146,7 @@ class EntityResolution(Extractor): try: await asyncio.gather(*tasks, return_exceptions=False) except Exception as e: + logging.error(f"Error resolving candidate pairs: {e}") for t in tasks: t.cancel() await asyncio.gather(*tasks, return_exceptions=True) @@ -169,6 +170,7 @@ class EntityResolution(Extractor): try: await asyncio.gather(*tasks, return_exceptions=False) except Exception as e: + logging.error(f"Error merging nodes: {e}") for t in tasks: t.cancel() await asyncio.gather(*tasks, return_exceptions=True) diff --git a/graphrag/general/extractor.py b/graphrag/general/extractor.py index 8549985e6..2d2f2b33d 100644 --- a/graphrag/general/extractor.py +++ b/graphrag/general/extractor.py @@ -192,6 +192,7 @@ class Extractor: try: await asyncio.gather(*tasks, return_exceptions=False) except Exception as e: + logging.error(f"Error merging nodes: {e}") for t in tasks: t.cancel() await asyncio.gather(*tasks, return_exceptions=True) @@ -221,6 +222,7 @@ class Extractor: try: await asyncio.gather(*tasks, return_exceptions=False) except Exception as e: + logging.error(f"Error during relationships merging: {e}") for t in tasks: t.cancel() await asyncio.gather(*tasks, return_exceptions=True) diff --git a/graphrag/general/mind_map_extractor.py b/graphrag/general/mind_map_extractor.py index f944aec98..3988b5bc7 100644 --- a/graphrag/general/mind_map_extractor.py +++ b/graphrag/general/mind_map_extractor.py @@ -108,6 +108,7 @@ class MindMapExtractor(Extractor): try: await asyncio.gather(*tasks, return_exceptions=False) except Exception as e: + logging.error(f"Error processing document: {e}") for t in tasks: t.cancel() await asyncio.gather(*tasks, return_exceptions=True) diff --git a/graphrag/utils.py b/graphrag/utils.py index a39bdd2d7..923fe8b53 100644 --- a/graphrag/utils.py +++ b/graphrag/utils.py @@ -479,6 +479,7 @@ async def set_graph(tenant_id: str, kb_id: str, embd_mdl, graph: nx.Graph, chang try: await asyncio.gather(*tasks, return_exceptions=False) except Exception as e: + logging.error(f"Error while deleting edges: {e}") for t in tasks: t.cancel() await asyncio.gather(*tasks, return_exceptions=True) diff --git a/rag/flow/parser/parser.py b/rag/flow/parser/parser.py index 1d1c199aa..2c4e68f14 100644 --- a/rag/flow/parser/parser.py +++ b/rag/flow/parser/parser.py @@ -20,6 +20,7 @@ import random import re from functools import partial +from litellm import logging import numpy as np from PIL import Image @@ -819,6 +820,7 @@ class Parser(ProcessBase): try: await asyncio.gather(*tasks, return_exceptions=False) except Exception as e: + logging.error("Error while parsing: %s" % e) for t in tasks: t.cancel() await asyncio.gather(*tasks, return_exceptions=True) diff --git a/rag/flow/splitter/splitter.py b/rag/flow/splitter/splitter.py index 851d880d4..e0174800f 100644 --- a/rag/flow/splitter/splitter.py +++ b/rag/flow/splitter/splitter.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import asyncio +import logging import random import re from copy import deepcopy @@ -135,6 +136,7 @@ class Splitter(ProcessBase): try: await asyncio.gather(*tasks, return_exceptions=False) except Exception as e: + logging.error(f"error when splitting: {e}") for t in tasks: t.cancel() await asyncio.gather(*tasks, return_exceptions=True) diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py index 3a6d64d8c..3283b0c62 100644 --- a/rag/svr/task_executor.py +++ b/rag/svr/task_executor.py @@ -339,6 +339,7 @@ async def build_chunks(task, progress_callback): try: await asyncio.gather(*tasks, return_exceptions=False) except Exception as e: + logging.error("Error in doc_keyword_extraction: {}".format(e)) for t in tasks: t.cancel() await asyncio.gather(*tasks, return_exceptions=True) @@ -370,6 +371,7 @@ async def build_chunks(task, progress_callback): try: await asyncio.gather(*tasks, return_exceptions=False) except Exception as e: + logging.error("Error in doc_question_proposal", exc_info=e) for t in tasks: t.cancel() await asyncio.gather(*tasks, return_exceptions=True) @@ -430,6 +432,7 @@ async def build_chunks(task, progress_callback): try: await asyncio.gather(*tasks, return_exceptions=False) except Exception as e: + logging.error("Error tagging docs: {}".format(e)) for t in tasks: t.cancel() await asyncio.gather(*tasks, return_exceptions=True)