cognee/evals/src/qa/qa_benchmark_lightrag.py
Vasilije f65605b575
fix: Feature/cog 2648 evals update (#1221)
<!-- .github/pull_request_template.md -->

## Description
<!-- Provide a clear description of the changes in this PR -->

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.

---------

Co-authored-by: lxobr <122801072+lxobr@users.noreply.github.com>
Co-authored-by: Hande <159312713+hande-k@users.noreply.github.com>
2025-08-08 20:23:09 +02:00

94 lines
2.7 KiB
Python

import asyncio
import os
from dataclasses import dataclass
from typing import Any
from dotenv import load_dotenv
from lightrag import LightRAG, QueryParam
from lightrag.llm.openai import gpt_4o_mini_complete, gpt_4o_complete, openai_embed
from lightrag.kg.shared_storage import initialize_pipeline_status
from lightrag.utils import setup_logger
from .qa_benchmark_base import QABenchmarkRAG, QABenchmarkConfig
load_dotenv()
setup_logger("lightrag", level="INFO")
@dataclass
class LightRAGConfig(QABenchmarkConfig):
"""Configuration for LightRAG QA benchmark."""
# Storage parameters
working_dir: str = "./lightrag_storage"
# Query parameters
query_mode: str = "hybrid" # "naive", "local", "global", "hybrid"
# Default results file
results_file: str = "hotpot_qa_lightrag_results.json"
class QABenchmarkLightRAG(QABenchmarkRAG):
"""LightRAG implementation of QA benchmark."""
def __init__(self, corpus, qa_pairs, config: LightRAGConfig):
super().__init__(corpus, qa_pairs, config)
self.config: LightRAGConfig = config
# Ensure working directory exists
if not os.path.exists(self.config.working_dir):
os.makedirs(self.config.working_dir)
async def initialize_rag(self) -> Any:
"""Initialize LightRAG with storage and pipeline setup."""
lightrag = LightRAG(
working_dir=self.config.working_dir,
embedding_func=openai_embed,
# llm_model_func=gpt_4o_mini_complete,
llm_model_func=gpt_4o_complete,
)
await lightrag.initialize_storages()
await initialize_pipeline_status()
return lightrag
async def cleanup_rag(self) -> None:
"""Clean up LightRAG storages."""
if self.rag_client:
await self.rag_client.finalize_storages()
async def insert_document(self, document: str, document_id: int) -> None:
"""Insert document into LightRAG."""
await self.rag_client.ainsert([document])
async def query_rag(self, question: str) -> str:
"""Query LightRAG and return the answer."""
result = await self.rag_client.aquery(
question, param=QueryParam(mode=self.config.query_mode)
)
return result
@property
def system_name(self) -> str:
"""Return system name."""
return "LightRAG"
if __name__ == "__main__":
# Example usage
config = LightRAGConfig(
corpus_limit=5, # Small test
qa_limit=3,
query_mode="hybrid",
print_results=True,
)
benchmark = QABenchmarkLightRAG.from_jsons(
corpus_file="hotpot_qa_24_corpus.json",
qa_pairs_file="hotpot_qa_24_qa_pairs.json",
config=config,
)
results = benchmark.run()