cherry-pick f6d1fb98

2025-12-04 19:18:15 +08:00 · 2025-12-04 19:18:15 +08:00 · aa12830be4
commit aa12830be4
parent fd109cdfcf
2 changed files with 150 additions and 20 deletions
--- a/lightrag/api/routers/graph_routes.py
+++ b/lightrag/api/routers/graph_routes.py
@ -264,20 +264,24 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
            }
        """
        try:
-            # Use the proper acreate_entity method which handles:
-            # - Graph lock for concurrency
-            # - Vector embedding creation in entities_vdb
-            # - Metadata population and defaults
-            # - Index consistency via _edit_entity_done
-            result = await rag.acreate_entity(
-                entity_name=request.entity_name,
-                entity_data=request.entity_data,
+            # Check if entity already exists
+            exists = await rag.chunk_entity_relation_graph.has_node(request.entity_name)
+            if exists:
+                raise ValueError(f"Entity '{request.entity_name}' already exists")
+
+            # Prepare entity data
+            entity_data = request.entity_data.copy()
+            entity_data["entity_id"] = request.entity_name
+
+            # Create the entity
+            await rag.chunk_entity_relation_graph.upsert_node(
+                request.entity_name, entity_data
            )

            return {
                "status": "success",
                "message": f"Entity '{request.entity_name}' created successfully",
-                "data": result,
+                "data": entity_data,
            }
        except ValueError as ve:
            logger.error(
@ -317,22 +321,36 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
            }
        """
        try:
-            # Use the proper acreate_relation method which handles:
-            # - Graph lock for concurrency
-            # - Entity existence validation
-            # - Duplicate relation checks
-            # - Vector embedding creation in relationships_vdb
-            # - Index consistency via _edit_relation_done
-            result = await rag.acreate_relation(
-                source_entity=request.source_entity,
-                target_entity=request.target_entity,
-                relation_data=request.relation_data,
+            # Check if both entities exist
+            source_exists = await rag.chunk_entity_relation_graph.has_node(
+                request.source_entity
+            )
+            target_exists = await rag.chunk_entity_relation_graph.has_node(
+                request.target_entity
+            )
+
+            if not source_exists:
+                raise ValueError(
+                    f"Source entity '{request.source_entity}' does not exist"
+                )
+            if not target_exists:
+                raise ValueError(
+                    f"Target entity '{request.target_entity}' does not exist"
+                )
+
+            # Create the relationship
+            await rag.chunk_entity_relation_graph.upsert_edge(
+                request.source_entity, request.target_entity, request.relation_data
            )

            return {
                "status": "success",
                "message": f"Relation created successfully between '{request.source_entity}' and '{request.target_entity}'",
-                "data": result,
+                "data": {
+                    "source": request.source_entity,
+                    "target": request.target_entity,
+                    **request.relation_data,
+                },
            }
        except ValueError as ve:
            logger.error(
--- a/reproduce/batch_eval.py
+++ b/reproduce/batch_eval.py
@ -0,0 +1,112 @@
+import re
+import json
+import jsonlines
+
+from openai import OpenAI
+
+
+def batch_eval(query_file, result1_file, result2_file, output_file_path):
+    client = OpenAI()
+
+    with open(query_file, "r") as f:
+        data = f.read()
+
+    queries = re.findall(r"- Question \d+: (.+)", data)
+
+    with open(result1_file, "r") as f:
+        answers1 = json.load(f)
+    answers1 = [i["result"] for i in answers1]
+
+    with open(result2_file, "r") as f:
+        answers2 = json.load(f)
+    answers2 = [i["result"] for i in answers2]
+
+    requests = []
+    for i, (query, answer1, answer2) in enumerate(zip(queries, answers1, answers2)):
+        sys_prompt = """
+        ---Role---
+        You are an expert tasked with evaluating two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
+        """
+
+        prompt = f"""
+        You will evaluate two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
+
+        - **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question?
+        - **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question?
+        - **Empowerment**: How well does the answer help the reader understand and make informed judgments about the topic?
+
+        For each criterion, choose the better answer (either Answer 1 or Answer 2) and explain why. Then, select an overall winner based on these three categories.
+
+        Here is the question:
+        {query}
+
+        Here are the two answers:
+
+        **Answer 1:**
+        {answer1}
+
+        **Answer 2:**
+        {answer2}
+
+        Evaluate both answers using the three criteria listed above and provide detailed explanations for each criterion.
+
+        Output your evaluation in the following JSON format:
+
+        {{
+            "Comprehensiveness": {{
+                "Winner": "[Answer 1 or Answer 2]",
+                "Explanation": "[Provide explanation here]"
+            }},
+            "Diversity": {{
+                "Winner": "[Answer 1 or Answer 2]",
+                "Explanation": "[Provide explanation here]"
+            }},
+            "Empowerment": {{
+                "Winner": "[Answer 1 or Answer 2]",
+                "Explanation": "[Provide explanation here]"
+            }},
+            "Overall Winner": {{
+                "Winner": "[Answer 1 or Answer 2]",
+                "Explanation": "[Summarize why this answer is the overall winner based on the three criteria]"
+            }}
+        }}
+        """
+
+        request_data = {
+            "custom_id": f"request-{i + 1}",
+            "method": "POST",
+            "url": "/v1/chat/completions",
+            "body": {
+                "model": "gpt-4o-mini",
+                "messages": [
+                    {"role": "system", "content": sys_prompt},
+                    {"role": "user", "content": prompt},
+                ],
+            },
+        }
+
+        requests.append(request_data)
+
+    with jsonlines.open(output_file_path, mode="w") as writer:
+        for request in requests:
+            writer.write(request)
+
+    print(f"Batch API requests written to {output_file_path}")
+
+    batch_input_file = client.files.create(
+        file=open(output_file_path, "rb"), purpose="batch"
+    )
+    batch_input_file_id = batch_input_file.id
+
+    batch = client.batches.create(
+        input_file_id=batch_input_file_id,
+        endpoint="/v1/chat/completions",
+        completion_window="24h",
+        metadata={"description": "nightly eval job"},
+    )
+
+    print(f"Batch {batch.id} has been created.")
+
+
+if __name__ == "__main__":
+    batch_eval()