Add support for Chinese book title marks in normalize_extracted_info

2025-09-04 18:51:57 +08:00 · 2025-09-04 18:51:57 +08:00 · 2c551cb5db
commit 2c551cb5db
parent ae65676b4e
1 changed files with 6 additions and 0 deletions
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@ -1896,6 +1896,12 @@ def normalize_extracted_info(name: str, remove_inner_quotes=False) -> str:
            if "‘" not in inner_content and "’" not in inner_content:
                name = inner_content

+        # Handle Chinese-style book title mark
+        if name.startswith("《") and name.endswith("》"):
+            inner_content = name[1:-1]
+            if "《" not in inner_content and "》" not in inner_content:
+                name = inner_content
+
    if remove_inner_quotes:
        # Remove Chinese quotes
        name = name.replace("“", "").replace("”", "").replace("‘", "").replace("’", "")