Improve RAGAS evaluation progress tracking and clean up output handling
• Add tqdm progress bar for eval steps • Pass progress bar to RAGAS evaluate • Ensure progress bar cleanup in finally • Remove redundant output buffer flushes
This commit is contained in:
parent
c358f405a9
commit
d36be1f499
1 changed files with 10 additions and 10 deletions
|
|
@ -82,6 +82,7 @@ try:
|
||||||
)
|
)
|
||||||
from ragas.llms import LangchainLLMWrapper
|
from ragas.llms import LangchainLLMWrapper
|
||||||
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
RAGAS_AVAILABLE = True
|
RAGAS_AVAILABLE = True
|
||||||
|
|
||||||
|
|
@ -405,7 +406,11 @@ class RAGEvaluator:
|
||||||
# Run RAGAS evaluation
|
# Run RAGAS evaluation
|
||||||
# IMPORTANT: Create fresh metric instances for each evaluation to avoid
|
# IMPORTANT: Create fresh metric instances for each evaluation to avoid
|
||||||
# concurrent state conflicts when multiple tasks run in parallel
|
# concurrent state conflicts when multiple tasks run in parallel
|
||||||
|
pbar = None
|
||||||
try:
|
try:
|
||||||
|
# Create standard tqdm progress bar for RAGAS evaluation
|
||||||
|
pbar = tqdm(total=4, desc=f"Eval-{idx}", leave=True)
|
||||||
|
|
||||||
eval_results = evaluate(
|
eval_results = evaluate(
|
||||||
dataset=eval_dataset,
|
dataset=eval_dataset,
|
||||||
metrics=[
|
metrics=[
|
||||||
|
|
@ -416,6 +421,7 @@ class RAGEvaluator:
|
||||||
],
|
],
|
||||||
llm=self.eval_llm,
|
llm=self.eval_llm,
|
||||||
embeddings=self.eval_embeddings,
|
embeddings=self.eval_embeddings,
|
||||||
|
_pbar=pbar,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Convert to DataFrame (RAGAS v0.3+ API)
|
# Convert to DataFrame (RAGAS v0.3+ API)
|
||||||
|
|
@ -472,6 +478,10 @@ class RAGEvaluator:
|
||||||
"ragas_score": 0,
|
"ragas_score": 0,
|
||||||
"timestamp": datetime.now().isoformat(),
|
"timestamp": datetime.now().isoformat(),
|
||||||
}
|
}
|
||||||
|
finally:
|
||||||
|
# Force close progress bar to ensure completion
|
||||||
|
if pbar is not None:
|
||||||
|
pbar.close()
|
||||||
|
|
||||||
async def evaluate_responses(self) -> List[Dict[str, Any]]:
|
async def evaluate_responses(self) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
|
|
@ -795,16 +805,6 @@ class RAGEvaluator:
|
||||||
with open(json_path, "w") as f:
|
with open(json_path, "w") as f:
|
||||||
json.dump(summary, f, indent=2)
|
json.dump(summary, f, indent=2)
|
||||||
|
|
||||||
# Add a small delay to ensure all buffered output is completely written
|
|
||||||
await asyncio.sleep(0.8)
|
|
||||||
# Flush all output buffers to ensure RAGAS progress bars are fully displayed
|
|
||||||
sys.stdout.flush()
|
|
||||||
sys.stderr.flush()
|
|
||||||
sys.stdout.write("\n")
|
|
||||||
sys.stderr.write("\n")
|
|
||||||
sys.stdout.flush()
|
|
||||||
sys.stderr.flush()
|
|
||||||
|
|
||||||
# Display results table
|
# Display results table
|
||||||
self._display_results_table(results)
|
self._display_results_table(results)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue