From a990c1d40b55dad3d26177b5b9679a4d7110641b Mon Sep 17 00:00:00 2001
From: BukeLy <bukely0119@foxmail.com>
Date: Mon, 17 Nov 2025 18:49:54 +0800
Subject: [PATCH] fix: Correct Mock LLM output format in E2E test

Why this change is needed:
The mock LLM function was returning JSON format, which is incorrect
for LightRAG's entity extraction. This caused "Complete delimiter
can not be found" warnings and resulted in 0 entities/relations
being extracted during tests.

How it solves it:
- Updated mock_llm_func to return correct tuple-delimited format
- Format: entity<|#|>name<|#|>type<|#|>description
- Format: relation<|#|>source<|#|>target<|#|>keywords<|#|>description
- Added proper completion delimiter: <|COMPLETE|>
- Now correctly extracts 2 entities and 1 relation

Impact:
- E2E test now properly validates entity/relation extraction
- No more "Complete delimiter" warnings
- Tests can now detect extraction-related bugs
- Graph files contain actual data (2 nodes, 1 edge) instead of empty graphs

Testing:
All 11 tests pass in 2.42s with proper entity extraction:
- Chunk 1 of 1 extracted 2 Ent + 1 Rel (previously 0 Ent + 0 Rel)
- Graph files now 2564 bytes (previously 310 bytes)
---
 tests/test_workspace_isolation.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/test_workspace_isolation.py b/tests/test_workspace_isolation.py
index 27a93caf..58942e6c 100644
--- a/tests/test_workspace_isolation.py
+++ b/tests/test_workspace_isolation.py
@@ -741,8 +741,13 @@ async def test_lightrag_end_to_end_workspace_isolation():
         async def mock_llm_func(
             prompt, system_prompt=None, history_messages=[], **kwargs
         ) -> str:
-            # Return a mock response that simulates entity extraction
-            return """{"entities": [{"name": "Test Entity", "type": "Concept"}], "relationships": []}"""
+            # Return a mock response that simulates entity extraction in the correct format
+            # Format: entity<|#|>entity_name<|#|>entity_type<|#|>entity_description
+            # Format: relation<|#|>source_entity<|#|>target_entity<|#|>keywords<|#|>description
+            return """entity<|#|>Artificial Intelligence<|#|>concept<|#|>AI is a field of computer science focused on creating intelligent machines.
+entity<|#|>Machine Learning<|#|>concept<|#|>Machine Learning is a subset of AI that enables systems to learn from data.
+relation<|#|>Machine Learning<|#|>Artificial Intelligence<|#|>subset, related field<|#|>Machine Learning is a key component and subset of Artificial Intelligence.
+<|COMPLETE|>"""
 
         # Mock embedding function
         async def mock_embedding_func(texts: list[str]) -> np.ndarray: