diff --git a/lightrag/api/routers/graph_routes.py b/lightrag/api/routers/graph_routes.py index bac3e104..46e225b6 100644 --- a/lightrag/api/routers/graph_routes.py +++ b/lightrag/api/routers/graph_routes.py @@ -264,20 +264,24 @@ def create_graph_routes(rag, api_key: Optional[str] = None): } """ try: - # Use the proper acreate_entity method which handles: - # - Graph lock for concurrency - # - Vector embedding creation in entities_vdb - # - Metadata population and defaults - # - Index consistency via _edit_entity_done - result = await rag.acreate_entity( - entity_name=request.entity_name, - entity_data=request.entity_data, + # Check if entity already exists + exists = await rag.chunk_entity_relation_graph.has_node(request.entity_name) + if exists: + raise ValueError(f"Entity '{request.entity_name}' already exists") + + # Prepare entity data + entity_data = request.entity_data.copy() + entity_data["entity_id"] = request.entity_name + + # Create the entity + await rag.chunk_entity_relation_graph.upsert_node( + request.entity_name, entity_data ) return { "status": "success", "message": f"Entity '{request.entity_name}' created successfully", - "data": result, + "data": entity_data, } except ValueError as ve: logger.error( @@ -317,22 +321,36 @@ def create_graph_routes(rag, api_key: Optional[str] = None): } """ try: - # Use the proper acreate_relation method which handles: - # - Graph lock for concurrency - # - Entity existence validation - # - Duplicate relation checks - # - Vector embedding creation in relationships_vdb - # - Index consistency via _edit_relation_done - result = await rag.acreate_relation( - source_entity=request.source_entity, - target_entity=request.target_entity, - relation_data=request.relation_data, + # Check if both entities exist + source_exists = await rag.chunk_entity_relation_graph.has_node( + request.source_entity + ) + target_exists = await rag.chunk_entity_relation_graph.has_node( + request.target_entity + ) + + if not source_exists: + raise ValueError( + f"Source entity '{request.source_entity}' does not exist" + ) + if not target_exists: + raise ValueError( + f"Target entity '{request.target_entity}' does not exist" + ) + + # Create the relationship + await rag.chunk_entity_relation_graph.upsert_edge( + request.source_entity, request.target_entity, request.relation_data ) return { "status": "success", "message": f"Relation created successfully between '{request.source_entity}' and '{request.target_entity}'", - "data": result, + "data": { + "source": request.source_entity, + "target": request.target_entity, + **request.relation_data, + }, } except ValueError as ve: logger.error( diff --git a/reproduce/batch_eval.py b/reproduce/batch_eval.py new file mode 100644 index 00000000..5a4cfc38 --- /dev/null +++ b/reproduce/batch_eval.py @@ -0,0 +1,112 @@ +import re +import json +import jsonlines + +from openai import OpenAI + + +def batch_eval(query_file, result1_file, result2_file, output_file_path): + client = OpenAI() + + with open(query_file, "r") as f: + data = f.read() + + queries = re.findall(r"- Question \d+: (.+)", data) + + with open(result1_file, "r") as f: + answers1 = json.load(f) + answers1 = [i["result"] for i in answers1] + + with open(result2_file, "r") as f: + answers2 = json.load(f) + answers2 = [i["result"] for i in answers2] + + requests = [] + for i, (query, answer1, answer2) in enumerate(zip(queries, answers1, answers2)): + sys_prompt = """ + ---Role--- + You are an expert tasked with evaluating two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**. + """ + + prompt = f""" + You will evaluate two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**. + + - **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question? + - **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question? + - **Empowerment**: How well does the answer help the reader understand and make informed judgments about the topic? + + For each criterion, choose the better answer (either Answer 1 or Answer 2) and explain why. Then, select an overall winner based on these three categories. + + Here is the question: + {query} + + Here are the two answers: + + **Answer 1:** + {answer1} + + **Answer 2:** + {answer2} + + Evaluate both answers using the three criteria listed above and provide detailed explanations for each criterion. + + Output your evaluation in the following JSON format: + + {{ + "Comprehensiveness": {{ + "Winner": "[Answer 1 or Answer 2]", + "Explanation": "[Provide explanation here]" + }}, + "Diversity": {{ + "Winner": "[Answer 1 or Answer 2]", + "Explanation": "[Provide explanation here]" + }}, + "Empowerment": {{ + "Winner": "[Answer 1 or Answer 2]", + "Explanation": "[Provide explanation here]" + }}, + "Overall Winner": {{ + "Winner": "[Answer 1 or Answer 2]", + "Explanation": "[Summarize why this answer is the overall winner based on the three criteria]" + }} + }} + """ + + request_data = { + "custom_id": f"request-{i + 1}", + "method": "POST", + "url": "/v1/chat/completions", + "body": { + "model": "gpt-4o-mini", + "messages": [ + {"role": "system", "content": sys_prompt}, + {"role": "user", "content": prompt}, + ], + }, + } + + requests.append(request_data) + + with jsonlines.open(output_file_path, mode="w") as writer: + for request in requests: + writer.write(request) + + print(f"Batch API requests written to {output_file_path}") + + batch_input_file = client.files.create( + file=open(output_file_path, "rb"), purpose="batch" + ) + batch_input_file_id = batch_input_file.id + + batch = client.batches.create( + input_file_id=batch_input_file_id, + endpoint="/v1/chat/completions", + completion_window="24h", + metadata={"description": "nightly eval job"}, + ) + + print(f"Batch {batch.id} has been created.") + + +if __name__ == "__main__": + batch_eval()