test(lightrag,examples): add prompt accuracy and quality tests

Add comprehensive test suites for prompt evaluation:
- test_prompt_accuracy.py: 365 lines testing prompt extraction accuracy
- test_prompt_quality_deep.py: 672 lines for deep quality analysis
- Refactor prompt.py to consolidate optimized variants (removed prompt_optimized.py)
- Apply ruff formatting and type hints across 30 files
- Update pyrightconfig.json for static type checking
- Modernize reproduce scripts and examples with improved type annotations
- Sync uv.lock dependencies
This commit is contained in:
clssck 2025-12-05 16:39:52 +01:00
parent 69358d830d
commit dd1413f3eb
30 changed files with 1430 additions and 854 deletions

View file

@ -30,7 +30,7 @@ def configure_logging():
log_file_path = os.path.abspath(os.path.join(log_dir, 'lightrag_compatible_demo.log'))
print(f'\nLightRAG compatible demo log file: {log_file_path}\n')
os.makedirs(os.path.dirname(log_dir), exist_ok=True)
os.makedirs(log_dir, exist_ok=True)
# Get log file max size and backup count from environment variables
log_max_bytes = int(os.getenv('LOG_MAX_BYTES', 10485760)) # Default 10MB
@ -123,6 +123,7 @@ async def initialize_rag():
async def main():
rag = None
try:
# Clear old data files
files_to_delete = [

View file

@ -30,7 +30,9 @@ print(f'EMBEDDING_MAX_TOKEN_SIZE: {EMBEDDING_MAX_TOKEN_SIZE}')
# LiteLLM configuration
LITELLM_URL = os.environ.get('LITELLM_URL', 'http://localhost:4000')
print(f'LITELLM_URL: {LITELLM_URL}')
LITELLM_KEY = os.environ.get('LITELLM_KEY', 'sk-4JdvGFKqSA3S0k_5p0xufw')
LITELLM_KEY = os.environ.get('LITELLM_KEY', '')
if not LITELLM_KEY:
raise ValueError('LITELLM_KEY environment variable must be set')
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)

View file

@ -219,7 +219,9 @@ class InsertTextRequest(BaseModel):
@field_validator('file_source', mode='after')
@classmethod
def strip_source_after(cls, file_source: str) -> str:
def strip_source_after(cls, file_source: str | None) -> str | None:
if file_source is None:
return None
return file_source.strip()
class Config:
@ -252,7 +254,9 @@ class InsertTextsRequest(BaseModel):
@field_validator('file_sources', mode='after')
@classmethod
def strip_sources_after(cls, file_sources: list[str]) -> list[str]:
def strip_sources_after(cls, file_sources: list[str] | None) -> list[str] | None:
if file_sources is None:
return None
return [file_source.strip() for file_source in file_sources]
class Config:
@ -438,7 +442,7 @@ class DocStatusResponse(BaseModel):
'updated_at': '2025-03-31T12:35:30',
'track_id': 'upload_20250729_170612_abc123',
'chunks_count': 12,
'error': None,
'error_msg': None,
'metadata': {'author': 'John Doe', 'year': 2025},
'file_path': 'research_paper.pdf',
}
@ -540,7 +544,7 @@ class TrackStatusResponse(BaseModel):
'updated_at': '2025-03-31T12:35:30',
'track_id': 'upload_20250729_170612_abc123',
'chunks_count': 12,
'error': None,
'error_msg': None,
'metadata': {'author': 'John Doe', 'year': 2025},
'file_path': 'research_paper.pdf',
}
@ -1595,7 +1599,8 @@ async def pipeline_index_texts(
if not texts:
return
if file_sources is not None and len(file_sources) != 0 and len(file_sources) != len(texts):
[file_sources.append('unknown_source') for _ in range(len(file_sources), len(texts))]
for _ in range(len(file_sources), len(texts)):
file_sources.append('unknown_source')
await rag.apipeline_enqueue_documents(input=texts, file_paths=file_sources, track_id=track_id)
await rag.apipeline_process_enqueue_documents()
@ -1718,7 +1723,7 @@ async def background_delete_documents(
file_path = '#'
try:
result = await rag.adelete_by_doc_id(doc_id, delete_llm_cache=delete_llm_cache)
file_path = getattr(result, 'file_path', '-') if 'result' in locals() else '-'
file_path = getattr(result, 'file_path', '-')
if result.status == 'success':
successful_deletions.append(doc_id)
success_msg = f'Document deleted {i}/{total_docs}: {doc_id}[{file_path}]'

View file

@ -153,19 +153,22 @@ class CitationExtractor:
"""Build index mapping chunk content to reference IDs."""
self.chunk_to_ref: dict[str, str] = {}
self.ref_to_chunks: dict[str, list[dict]] = {}
self.path_to_ref: dict[str, str] = {}
# Map file_path to reference_id
path_to_ref: dict[str, str] = {}
for ref in self.references:
path_to_ref[ref.get('file_path', '')] = ref.get('reference_id', '')
path = ref.get('file_path', '')
if path:
self.path_to_ref[path] = ref.get('reference_id', '')
# Index chunks by reference
for chunk in self.chunks:
file_path = chunk.get('file_path', '')
ref_id = path_to_ref.get(file_path, '')
ref_id = self.path_to_ref.get(file_path, '')
if ref_id:
self.chunk_to_ref[chunk.get('content', '')[:100]] = ref_id
chunk_id = chunk.get('id') or chunk.get('chunk_id') or chunk.get('content', '')[:100]
self.chunk_to_ref[chunk_id] = ref_id
if ref_id not in self.ref_to_chunks:
self.ref_to_chunks[ref_id] = []
@ -233,12 +236,7 @@ class CitationExtractor:
if final_score >= self.min_similarity:
file_path = chunk.get('file_path', '')
# Find reference_id for this chunk
ref_id = None
for ref in self.references:
if ref.get('file_path') == file_path:
ref_id = ref.get('reference_id')
break
ref_id = self.path_to_ref.get(file_path)
if ref_id:
matches.append(

View file

@ -13,10 +13,12 @@ Note: RAGEvaluator is imported lazily to avoid import errors
when ragas/datasets are not installed.
"""
from typing import Any
__all__ = ['RAGEvaluator']
def __getattr__(name):
def __getattr__(name: str) -> Any:
"""Lazy import to avoid dependency errors when ragas is not installed."""
if name == 'RAGEvaluator':
from .eval_rag_quality import RAGEvaluator

View file

@ -1,3 +1,4 @@
import asyncio
import configparser
import logging
import os
@ -60,8 +61,8 @@ redis_retry = retry(
class RedisConnectionManager:
"""Shared Redis connection pool manager to avoid creating multiple pools for the same Redis URI"""
_pools: ClassVar[dict] = {}
_pool_refs: ClassVar[dict] = {} # Track reference count for each pool
_pools: ClassVar[dict[str, ConnectionPool]] = {}
_pool_refs: ClassVar[dict[str, int]] = {} # Track reference count for each pool
_lock: ClassVar[threading.Lock] = threading.Lock()
@classmethod
@ -86,8 +87,8 @@ class RedisConnectionManager:
return cls._pools[redis_url]
@classmethod
def release_pool(cls, redis_url: str):
"""Release a reference to the connection pool"""
async def release_pool_async(cls, redis_url: str):
"""Release a reference to the connection pool (async to await disconnect)."""
with cls._lock:
if redis_url in cls._pool_refs:
cls._pool_refs[redis_url] -= 1
@ -95,14 +96,26 @@ class RedisConnectionManager:
# If no more references, close the pool
if cls._pool_refs[redis_url] <= 0:
pool = cls._pools.get(redis_url)
try:
cls._pools[redis_url].disconnect()
logger.info(f'Closed Redis connection pool for {redis_url} (no more references)')
if pool:
await pool.disconnect()
logger.info(f'Closed Redis connection pool for {redis_url} (no more references)')
except Exception as e:
logger.error(f'Error closing Redis pool for {redis_url}: {e}')
finally:
del cls._pools[redis_url]
del cls._pool_refs[redis_url]
cls._pools.pop(redis_url, None)
cls._pool_refs.pop(redis_url, None)
@classmethod
def release_pool(cls, redis_url: str):
"""Sync-friendly wrapper that schedules async pool release."""
try:
loop = asyncio.get_running_loop()
except RuntimeError:
asyncio.run(cls.release_pool_async(redis_url))
return
loop.create_task(cls.release_pool_async(redis_url))
@classmethod
def close_all_pools(cls):
@ -225,7 +238,7 @@ class RedisKVStorage(BaseKVStorage):
# Release the pool reference (will auto-close pool if no more references)
if hasattr(self, '_redis_url') and self._redis_url:
RedisConnectionManager.release_pool(self._redis_url)
await RedisConnectionManager.release_pool_async(self._redis_url)
self._pool = None
logger.debug(f'[{self.workspace}] Released Redis connection pool reference for {self.namespace}')
@ -506,7 +519,7 @@ class RedisDocStatusStorage(DocStatusStorage):
else:
# When workspace is empty, final_namespace equals original namespace
self.final_namespace = self.namespace
self.workspace = '_'
self.workspace = ''
logger.debug(f"[{self.workspace}] Final namespace (no workspace): '{self.namespace}'")
self._redis_url = os.environ.get('REDIS_URI', config.get('redis', 'uri', fallback='redis://localhost:6379'))
@ -577,7 +590,7 @@ class RedisDocStatusStorage(DocStatusStorage):
# Release the pool reference (will auto-close pool if no more references)
if hasattr(self, '_redis_url') and self._redis_url:
RedisConnectionManager.release_pool(self._redis_url)
await RedisConnectionManager.release_pool_async(self._redis_url)
self._pool = None
logger.debug(f'[{self.workspace}] Released Redis connection pool reference for doc status {self.namespace}')

View file

@ -154,9 +154,9 @@ async def bedrock_complete_if_cache(
if history_messages is None:
history_messages = []
if enable_cot:
import logging
from lightrag.utils import logger
logging.debug('enable_cot=True is not supported for Bedrock and will be ignored.')
logger.debug('enable_cot=True is not supported for Bedrock and will be ignored.')
# Respect existing env; only set if a non-empty value is available
access_key = os.environ.get('AWS_ACCESS_KEY_ID') or aws_access_key_id
secret_key = os.environ.get('AWS_SECRET_ACCESS_KEY') or aws_secret_access_key
@ -327,7 +327,10 @@ async def bedrock_complete(
if history_messages is None:
history_messages = []
kwargs.pop('keyword_extraction', None)
model_name = kwargs['hashing_kv'].global_config['llm_model_name']
hashing_kv = kwargs.get('hashing_kv')
if not hashing_kv:
raise ValueError("'hashing_kv' parameter is required")
model_name = hashing_kv.global_config['llm_model_name']
result = await bedrock_complete_if_cache(
model_name,
prompt,

View file

@ -285,7 +285,7 @@ class BindingOptions:
sample_stream.write(f'# {arg_item["help"]}\n')
# Handle JSON formatting for list and dict types
if arg_item['type'] is list[str] or arg_item['type'] is dict:
if arg_item['type'] == list[str] or arg_item['type'] == dict:
default_value = json.dumps(arg_item['default'])
else:
default_value = arg_item['default']
@ -527,7 +527,7 @@ class OpenAILLMOptions(BindingOptions):
temperature: float = DEFAULT_TEMPERATURE # Controls randomness (0.0 to 2.0)
top_p: float = 1.0 # Nucleus sampling parameter (0.0 to 1.0)
max_tokens: int | None = None # Maximum number of tokens to generate(deprecated, use max_completion_tokens instead)
extra_body: dict | None = None # Extra body parameters for OpenRouter of vLLM
extra_body: dict[str, Any] | None = None # Extra body parameters for OpenRouter of vLLM
# Help descriptions
_help: ClassVar[dict[str, str]] = {

View file

@ -1,4 +1,3 @@
pass
import pipmaster as pm # Pipmaster for dynamic library install
# install specific modules
@ -10,6 +9,7 @@ import struct
import aiohttp
import numpy as np
from lightrag.utils import logger
from openai import (
APIConnectionError,
APITimeoutError,
@ -34,7 +34,9 @@ async def siliconcloud_embedding(
base_url: str = 'https://api.siliconflow.cn/v1/embeddings',
max_token_size: int = 8192,
api_key: str | None = None,
encoding_format: str = 'base64',
) -> np.ndarray:
logger.debug(f'siliconcloud_embedding called with {len(texts)} texts, model={model}, encoding={encoding_format}')
if api_key and not api_key.startswith('Bearer '):
api_key = 'Bearer ' + api_key
@ -42,22 +44,32 @@ async def siliconcloud_embedding(
truncate_texts = [text[0:max_token_size] for text in texts]
payload = {'model': model, 'input': truncate_texts, 'encoding_format': 'base64'}
payload = {'model': model, 'input': truncate_texts, 'encoding_format': encoding_format}
base64_strings = []
async with (
aiohttp.ClientSession() as session,
session.post(base_url, headers=headers, json=payload) as response,
):
content = await response.json()
try:
content = await response.json()
except Exception as exc:
logger.error(f'Failed to parse siliconcloud response: {exc}')
raise
if 'code' in content:
logger.error(f'API error response: {content}')
raise ValueError(content)
base64_strings = [item['embedding'] for item in content['data']]
embeddings = []
for string in base64_strings:
decode_bytes = base64.b64decode(string)
n = len(decode_bytes) // 4
float_array = struct.unpack('<' + 'f' * n, decode_bytes)
embeddings.append(float_array)
return np.array(embeddings)
if encoding_format == 'base64':
base64_strings = [item['embedding'] for item in content['data']]
embeddings = []
for string in base64_strings:
decode_bytes = base64.b64decode(string)
n = len(decode_bytes) // 4
float_array = struct.unpack('<' + 'f' * n, decode_bytes)
embeddings.append(float_array)
logger.debug(f'Decoded {len(embeddings)} embeddings from base64')
return np.array(embeddings)
embeddings = np.array([item['embedding'] for item in content['data']])
logger.debug(f'Returned {len(embeddings)} embeddings (raw format)')
return embeddings

View file

@ -10,6 +10,7 @@ implementation mirrors the OpenAI helpers while relying on the official
from __future__ import annotations
import asyncio
import contextlib
import logging
import os
from collections.abc import AsyncIterator
@ -38,8 +39,6 @@ if not pm.is_installed('google-genai'):
if not pm.is_installed('google-api-core'):
pm.install('google-api-core')
import contextlib
from google import genai # type: ignore
from google.api_core import exceptions as google_api_exceptions # type: ignore
from google.genai import types # type: ignore

View file

@ -93,9 +93,10 @@ async def hf_model_if_cache(
+ '>\n'
)
input_ids = hf_tokenizer(input_prompt, return_tensors='pt', padding=True, truncation=True).to('cuda')
inputs = {k: v.to(hf_model.device) for k, v in input_ids.items()}
output = hf_model.generate(**input_ids, max_new_tokens=512, num_return_sequences=1, early_stopping=True)
device = hf_model.device
tokenized = hf_tokenizer(input_prompt, return_tensors='pt', padding=True, truncation=True).to(device)
inputs = {k: v.to(device) for k, v in tokenized.items()}
output = hf_model.generate(**inputs, max_new_tokens=512, num_return_sequences=1, early_stopping=True)
response_text = hf_tokenizer.decode(output[0][len(inputs['input_ids'][0]) :], skip_special_tokens=True)
return response_text

View file

@ -99,16 +99,14 @@ async def jina_embed(
aiohttp.ClientError: If there is a connection error with the Jina API.
aiohttp.ClientResponseError: If the Jina API returns an error response.
"""
if api_key:
os.environ['JINA_API_KEY'] = api_key
if 'JINA_API_KEY' not in os.environ:
effective_api_key = api_key or os.environ.get('JINA_API_KEY')
if not effective_api_key:
raise ValueError('JINA_API_KEY environment variable is required')
url = base_url or 'https://api.jina.ai/v1/embeddings'
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {os.environ["JINA_API_KEY"]}',
'Authorization': f'Bearer {effective_api_key}',
}
data = {
'model': model,

View file

@ -1,3 +1,4 @@
import asyncio
from collections.abc import AsyncIterator
import pipmaster as pm # Pipmaster for dynamic library install
@ -72,15 +73,16 @@ async def lollms_model_if_cache(
}
# Prepare the full prompt including history
full_prompt = ''
prompt_parts = []
if system_prompt:
full_prompt += f'{system_prompt}\n'
prompt_parts.append(f'{system_prompt}\n')
for msg in history_messages:
full_prompt += f'{msg["role"]}: {msg["content"]}\n'
full_prompt += prompt
prompt_parts.append(f'{msg["role"]}: {msg["content"]}\n')
prompt_parts.append(prompt)
full_prompt = ''.join(prompt_parts)
request_data['prompt'] = full_prompt
timeout = aiohttp.ClientTimeout(total=kwargs.get('timeout'))
timeout = aiohttp.ClientTimeout(total=kwargs.get('timeout', 300))
async with aiohttp.ClientSession(timeout=timeout, headers=headers) as session:
if stream:
@ -106,13 +108,14 @@ async def lollms_model_complete(
) -> str | AsyncIterator[str]:
"""Complete function for lollms model generation."""
# Extract and remove keyword_extraction from kwargs if present
if history_messages is None:
history_messages = []
keyword_extraction = kwargs.pop('keyword_extraction', None)
# Get model name from config
model_name = kwargs['hashing_kv'].global_config['llm_model_name']
try:
model_name = kwargs['hashing_kv'].global_config['llm_model_name']
except (KeyError, AttributeError) as exc:
raise ValueError('Missing required configuration: hashing_kv.global_config.llm_model_name') from exc
# If keyword extraction is needed, we might need to modify the prompt
# or add specific parameters for JSON output (if lollms supports it)
@ -146,20 +149,19 @@ async def lollms_embed(texts: list[str], embed_model=None, base_url='http://loca
"""
api_key = kwargs.pop('api_key', None)
headers = (
{'Content-Type': 'application/json', 'Authorization': api_key}
{'Content-Type': 'application/json', 'Authorization': f'Bearer {api_key}'}
if api_key
else {'Content-Type': 'application/json'}
)
async with aiohttp.ClientSession(headers=headers) as session:
embeddings = []
for text in texts:
async def fetch_embedding(text: str):
request_data = {'text': text}
async with session.post(
f'{base_url}/lollms_embed',
json=request_data,
) as response:
async with session.post(f'{base_url}/lollms_embed', json=request_data) as response:
result = await response.json()
embeddings.append(result['vector'])
if 'vector' not in result:
raise ValueError(f'Unexpected embedding response format: {result}')
return result['vector']
embeddings = await asyncio.gather(*[fetch_embedding(text) for text in texts])
return np.array(embeddings)

View file

@ -1,7 +1,5 @@
import os
pass
import pipmaster as pm # Pipmaster for dynamic library install
# install specific modules

View file

@ -3,7 +3,6 @@ import re
from lightrag.utils import verbose_debug
pass
import pipmaster as pm # Pipmaster for dynamic library install
# install specific modules
@ -95,8 +94,8 @@ async def zhipu_complete(
):
if history_messages is None:
history_messages = []
# Pop keyword_extraction from kwargs to avoid passing it to zhipu_complete_if_cache
keyword_extraction = kwargs.pop('keyword_extraction', None)
# Remove keyword_extraction from kwargs if it was passed redundantly
kwargs.pop('keyword_extraction', None)
if keyword_extraction:
# Add a system prompt to guide the model to return JSON format

View file

@ -86,7 +86,9 @@ from lightrag.utils import (
# Query embedding cache configuration (configurable via environment variables)
QUERY_EMBEDDING_CACHE_TTL = int(os.getenv('QUERY_EMBEDDING_CACHE_TTL', '3600')) # 1 hour
QUERY_EMBEDDING_CACHE_MAX_SIZE = int(os.getenv('QUERY_EMBEDDING_CACHE_SIZE', '10000'))
QUERY_EMBEDDING_CACHE_MAX_SIZE = int(
os.getenv('QUERY_EMBEDDING_CACHE_MAX_SIZE', os.getenv('QUERY_EMBEDDING_CACHE_SIZE', '10000'))
)
# Redis cache configuration
REDIS_EMBEDDING_CACHE_ENABLED = os.getenv('REDIS_EMBEDDING_CACHE', 'false').lower() == 'true'
@ -95,6 +97,7 @@ REDIS_URI = os.getenv('REDIS_URI', 'redis://localhost:6379')
# Local in-memory cache with LRU eviction
# Structure: {query_hash: (embedding, timestamp)}
_query_embedding_cache: dict[str, tuple[list[float], float]] = {}
_query_embedding_cache_lock = asyncio.Lock()
# Global Redis client (lazy initialized)
_redis_client = None
@ -164,14 +167,15 @@ async def get_cached_query_embedding(query: str, embedding_func) -> list[float]
embedding_result = embedding[0] # Extract first from batch
# Manage local cache size - LRU eviction of oldest entries
if len(_query_embedding_cache) >= QUERY_EMBEDDING_CACHE_MAX_SIZE:
# Remove oldest 10% of entries
sorted_entries = sorted(_query_embedding_cache.items(), key=lambda x: x[1][1])
for old_key, _ in sorted_entries[: QUERY_EMBEDDING_CACHE_MAX_SIZE // 10]:
del _query_embedding_cache[old_key]
async with _query_embedding_cache_lock:
if len(_query_embedding_cache) >= QUERY_EMBEDDING_CACHE_MAX_SIZE:
# Remove oldest 10% of entries
sorted_entries = sorted(_query_embedding_cache.items(), key=lambda x: x[1][1])
for old_key, _ in sorted_entries[: QUERY_EMBEDDING_CACHE_MAX_SIZE // 10]:
del _query_embedding_cache[old_key]
# Store in local cache
_query_embedding_cache[query_hash] = (embedding_result, current_time)
# Store in local cache
_query_embedding_cache[query_hash] = (embedding_result, current_time)
# Store in Redis (if enabled)
if REDIS_EMBEDDING_CACHE_ENABLED:
@ -333,7 +337,7 @@ async def _handle_entity_relation_summary(
return final_description if final_description else '', llm_was_used
else:
if total_tokens > summary_context_size and len(current_list) <= 2:
logger.warning(f'Summarizing {entity_or_relation_name}: Oversize descpriton found')
logger.warning(f'Summarizing {entity_or_relation_name}: Oversize description found')
# Final summarization of remaining descriptions - LLM will be used
final_summary = await _summarize_descriptions(
description_type,
@ -361,7 +365,7 @@ async def _handle_entity_relation_summary(
# Force add one more description to ensure minimum 2 per chunk
current_chunk.append(desc)
chunks.append(current_chunk)
logger.warning(f'Summarizing {entity_or_relation_name}: Oversize descpriton found')
logger.warning(f'Summarizing {entity_or_relation_name}: Oversize description found')
current_chunk = [] # next group is empty
current_tokens = 0
else: # curren_chunk is ready for summary in reduce phase
@ -499,7 +503,7 @@ async def _handle_single_entity_extraction(
if len(record_attributes) != 4 or 'entity' not in record_attributes[0]:
if len(record_attributes) > 1 and 'entity' in record_attributes[0]:
logger.warning(
f'{chunk_key}: LLM output format error; found {len(record_attributes)}/4 feilds on ENTITY `{record_attributes[1]}` @ `{record_attributes[2] if len(record_attributes) > 2 else "N/A"}`'
f'{chunk_key}: LLM output format error; found {len(record_attributes)}/4 fields on ENTITY `{record_attributes[1]}` @ `{record_attributes[2] if len(record_attributes) > 2 else "N/A"}`'
)
logger.debug(record_attributes)
return None
@ -559,7 +563,7 @@ async def _handle_single_relationship_extraction(
): # treat "relationship" and "relation" interchangeable
if len(record_attributes) > 1 and 'relation' in record_attributes[0]:
logger.warning(
f'{chunk_key}: LLM output format error; found {len(record_attributes)}/5 fields on REALTION `{record_attributes[1]}`~`{record_attributes[2] if len(record_attributes) > 2 else "N/A"}`'
f'{chunk_key}: LLM output format error; found {len(record_attributes)}/5 fields on RELATION `{record_attributes[1]}`~`{record_attributes[2] if len(record_attributes) > 2 else "N/A"}`'
)
logger.debug(record_attributes)
return None
@ -2687,14 +2691,12 @@ async def merge_nodes_and_edges(
file_path: File path for logging
"""
if full_entities_storage is None or full_relations_storage is None:
raise ValueError('full_entities_storage and full_relations_storage are required for merge operations')
if pipeline_status is None:
pipeline_status = {}
if pipeline_status_lock is None:
pipeline_status_lock = asyncio.Lock()
if full_entities_storage is None or full_relations_storage is None:
raise ValueError('full_entities_storage and full_relations_storage are required for merge operations')
assert full_entities_storage is not None
assert full_relations_storage is not None
# Check for cancellation at the start of merge
if pipeline_status is not None and pipeline_status_lock is not None:
@ -3405,8 +3407,6 @@ async def kg_query(
# Apply higher priority (5) to query relation LLM function
use_model_func = partial(use_model_func, _priority=5)
llm_callable = cast(Callable[..., Awaitable[str | AsyncIterator[str]]], use_model_func)
llm_callable = cast(Callable[..., Awaitable[str | AsyncIterator[str]]], use_model_func)
llm_callable = cast(Callable[..., Awaitable[str | AsyncIterator[str]]], use_model_func)
hl_keywords, ll_keywords = await get_keywords_from_query(query, query_param, global_config, hashing_kv)
@ -4081,9 +4081,6 @@ async def _merge_all_chunks(
raise ValueError('query_param is required for merging chunks')
if knowledge_graph_inst is None or chunks_vdb is None:
raise ValueError('knowledge_graph_inst and chunks_vdb are required for chunk merging')
assert query_param is not None
assert knowledge_graph_inst is not None
assert chunks_vdb is not None
if chunk_tracking is None:
chunk_tracking = {}

View file

@ -9,70 +9,42 @@ PROMPTS['DEFAULT_TUPLE_DELIMITER'] = '<|#|>'
PROMPTS['DEFAULT_COMPLETION_DELIMITER'] = '<|COMPLETE|>'
PROMPTS['entity_extraction_system_prompt'] = """---Role---
You are a Knowledge Graph Specialist responsible for extracting entities and relationships from the input text.
You are a Knowledge Graph Specialist extracting entities and relationships from text.
---Instructions---
1. **Entity Extraction & Output:**
* **Identification:** Identify clearly defined and meaningful entities in the input text.
* **Entity Details:** For each identified entity, extract the following information:
* `entity_name`: The name of the entity. If the entity name is case-insensitive, capitalize the first letter of each significant word (title case). Ensure **consistent naming** across the entire extraction process.
* `entity_type`: Categorize the entity using one of the following types: `{entity_types}`. If none of the provided types apply, do not invent a new type; classify it as `Other`.
* `entity_description`: Provide a concise yet comprehensive description of the entity's attributes and activities, based *solely* on the information present in the input text.
* **Output Format - Entities:** Output a total of 4 fields for each entity, delimited by `{tuple_delimiter}`, on a single line. The first field *must* be the literal string `entity`.
* Format: `entity{tuple_delimiter}entity_name{tuple_delimiter}entity_type{tuple_delimiter}entity_description`
---Output Format---
Output raw lines onlyNO markdown, NO headers, NO backticks.
2. **Relationship Extraction & Output:**
* **Identification:** Identify meaningful relationships between previously extracted entities. Include:
* **Direct relationships:** Explicitly stated interactions, actions, or connections.
* **Comparative relationships:** When entities are explicitly grouped, ranked, or compared (e.g., 'Brazil has 5 wins, Germany has 4' establishes a comparison).
* **Hierarchical relationships:** Clear part-of, member-of, or type-of connections.
* **Causal relationships:** Explicit cause-effect connections.
* **Categorical relationships:** When entities share explicit group membership (e.g., 'World Cup winners include Brazil, Germany, and Italy').
* **AVOID** purely speculative or inferred connections not supported by text structure.
* **N-ary Relationship Decomposition:** If a single statement describes a relationship involving more than two entities (an N-ary relationship), decompose it into multiple binary (two-entity) relationship pairs for separate description.
* **Example:** For "Alice, Bob, and Carol collaborated on Project X," extract binary relationships such as "Alice collaborated with Project X," "Bob collaborated with Project X," and "Carol collaborated with Project X," or "Alice collaborated with Bob," based on the most reasonable binary interpretations.
* **Relationship Details:** For each binary relationship, extract the following fields:
* `source_entity`: The name of the source entity. Ensure **consistent naming** with entity extraction. Capitalize the first letter of each significant word (title case) if the name is case-insensitive.
* `target_entity`: The name of the target entity. Ensure **consistent naming** with entity extraction. Capitalize the first letter of each significant word (title case) if the name is case-insensitive.
* `relationship_keywords`: One or more high-level keywords summarizing the overarching nature, concepts, or themes of the relationship. Multiple keywords within this field must be separated by a comma `,`. **DO NOT use `{tuple_delimiter}` for separating multiple keywords within this field.**
* `relationship_description`: A concise explanation of the nature of the relationship between the source and target entities, providing a clear rationale for their connection.
* **Output Format - Relationships:** Output a total of 5 fields for each relationship, delimited by `{tuple_delimiter}`, on a single line. The first field *must* be the literal string `relation`.
* Format: `relation{tuple_delimiter}source_entity{tuple_delimiter}target_entity{tuple_delimiter}relationship_keywords{tuple_delimiter}relationship_description`
* **Relationship Quality Requirement:** Extract relationships that are supported by text structure. Balance precision with graph connectivity:
* Primary: Direct, causal, and hierarchical relationships with explicit textual support.
* Secondary: Comparative and categorical relationships when entities are grouped or ranked together.
* An orphan entity is acceptable only if the text truly provides no structural grouping.
* Do NOT invent speculative connections, but DO capture explicit groupings and comparisons.
* **Attribution Verification:** When extracting relationships, ensure the source and target entities are correctly identified from the text. Do not conflate similar entities or transfer attributes from one entity to another.
Entity: entity{tuple_delimiter}name{tuple_delimiter}type{tuple_delimiter}description
Relation: relation{tuple_delimiter}source{tuple_delimiter}target{tuple_delimiter}keywords{tuple_delimiter}description
3. **Delimiter Usage Protocol:**
* The `{tuple_delimiter}` is a complete, atomic marker and **must not be filled with content**. It serves strictly as a field separator.
* **Incorrect Example:** `entity{tuple_delimiter}Tokyo<|location|>Tokyo is the capital of Japan.`
* **Correct Example:** `entity{tuple_delimiter}Tokyo{tuple_delimiter}location{tuple_delimiter}Tokyo is the capital of Japan.`
Use Title Case for names. Separate keywords with commas. Output entities first, then relations. End with {completion_delimiter}.
4. **Relationship Direction & Duplication:**
* Treat all relationships as **undirected** unless explicitly stated otherwise. Swapping the source and target entities for an undirected relationship does not constitute a new relationship.
* Avoid outputting duplicate relationships.
---Entity Extraction---
Extract BOTH concrete and abstract entities:
- **Concrete:** Named people, organizations, places, products, dates
- **Abstract:** Concepts, events, categories, processes mentioned in text (e.g., "market selloff", "merger", "pandemic")
5. **Output Order & Prioritization:**
* Output all extracted entities first, followed by all extracted relationships.
* Within the list of relationships, prioritize and output those relationships that are **most significant** to the core meaning of the input text first.
Types: `{entity_types}` (use `Other` if none fit)
6. **Context & Objectivity:**
* Ensure all entity names and descriptions are written in the **third person**.
* Explicitly name the subject or object; **avoid using pronouns** such as `this article`, `this paper`, `our company`, `I`, `you`, and `he/she`.
---Relationship Extraction---
Extract meaningful relationships:
- **Direct:** explicit interactions, actions, connections
- **Categorical:** entities sharing group membership or classification
- **Causal:** cause-effect relationships
- **Hierarchical:** part-of, member-of, type-of
7. **Language & Proper Nouns:**
* The entire output (entity names, keywords, and descriptions) must be written in `{language}`.
* Proper nouns (e.g., personal names, place names, organization names) should be retained in their original language if a proper, widely accepted translation is not available or would cause ambiguity.
Create intermediate concept entities when they help connect related items (e.g., "Vaccines" connecting Pfizer/Moderna/AstraZeneca).
8. **Completion Signal:** Output the literal string `{completion_delimiter}` only after all entities and relationships, following all criteria, have been completely extracted and outputted.
For N-ary relationships, decompose into binary pairs. Avoid duplicates.
---Guidelines---
- Third person only; no pronouns like "this article", "I", "you"
- Output in `{language}`. Keep proper nouns in original language.
---Examples---
{examples}
---Real Data to be Processed---
<Input>
---Input---
Entity_types: [{entity_types}]
Text:
```
@ -81,162 +53,108 @@ Text:
"""
PROMPTS['entity_extraction_user_prompt'] = """---Task---
Extract entities and relationships from the input text to be processed.
Extract entities and relationships from the text. Include both concrete entities AND abstract concepts/events.
---Instructions---
1. **Strict Adherence to Format:** Strictly adhere to all format requirements for entity and relationship lists, including output order, field delimiters, and proper noun handling, as specified in the system prompt.
2. **Output Content Only:** Output *only* the extracted list of entities and relationships. Do not include any introductory or concluding remarks, explanations, or additional text before or after the list.
3. **Completion Signal:** Output `{completion_delimiter}` as the final line after all relevant entities and relationships have been extracted and presented.
4. **Output Language:** Ensure the output language is {language}. Proper nouns (e.g., personal names, place names, organization names) must be kept in their original language and not translated.
Follow format exactly. Output only extractionsno explanations. End with `{completion_delimiter}`.
Output in {language}; keep proper nouns in original language.
<Output>
"""
PROMPTS['entity_continue_extraction_user_prompt'] = """---Task---
Based on the last extraction task, identify and extract any **missed or incorrectly formatted** entities and relationships from the input text. Pay special attention to **orphan entities** (entities with no relationships).
Review extraction for missed entities/relationships.
---Instructions---
1. **Strict Adherence to System Format:** Strictly adhere to all format requirements for entity and relationship lists, including output order, field delimiters, and proper noun handling, as specified in the system instructions.
2. **Graph Connectivity Check:**
* Review extracted entities. For any without relationships, check if they appear in:
- Groupings or lists (categorical relationship candidates)
- Comparisons or rankings (comparative relationship candidates)
- Shared contexts with other entities (domain relationship candidates)
* Add these relationships if supported by text structure.
* An isolated entity is acceptable only if truly unconnected in the text.
3. **Focus on Corrections/Additions:**
* **Do NOT** re-output entities and relationships that were **correctly and fully** extracted in the last task.
* If an entity or relationship was **missed** in the last task, extract and output it now according to the system format.
* If an entity or relationship was **truncated, had missing fields, or was otherwise incorrectly formatted** in the last task, re-output the *corrected and complete* version in the specified format.
4. **Output Format - Entities:** Output a total of 4 fields for each entity, delimited by `{tuple_delimiter}`, on a single line. The first field *must* be the literal string `entity`.
5. **Output Format - Relationships:** Output a total of 5 fields for each relationship, delimited by `{tuple_delimiter}`, on a single line. The first field *must* be the literal string `relation`.
6. **Output Content Only:** Output *only* the extracted list of entities and relationships. Do not include any introductory or concluding remarks, explanations, or additional text before or after the list.
7. **Completion Signal:** Output `{completion_delimiter}` as the final line after all relevant missing or corrected entities and relationships have been extracted and presented.
8. **Output Language:** Ensure the output language is {language}. Proper nouns (e.g., personal names, place names, organization names) must be kept in their original language and not translated.
Check for:
1. Abstract concepts that could serve as hubs (events, categories, processes)
2. Orphan entities that need connections
3. Formatting errors
Only output NEW or CORRECTED items. End with `{completion_delimiter}`. Output in {language}.
<Output>
"""
PROMPTS['entity_extraction_examples'] = [
# Example 1: Shows abstract concept extraction (Market Selloff as hub)
"""<Input Text>
```
while Alex clenched his jaw, the buzz of frustration dull against the backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was an unspoken rebellion against Cruz's narrowing vision of control and order.
Stock markets faced a sharp downturn as tech giants saw significant declines, with the global tech index dropping 3.4%. Nexon Technologies saw its stock plummet 7.8% after lower-than-expected earnings. In contrast, Omega Energy posted a 2.1% gain driven by rising oil prices.
Then Taylor did something unexpected. They paused beside Jordan and, for a moment, observed the device with something akin to reverence. "If this tech can be understood..." Taylor said, their voice quieter, "It could change the game for us. For all of us."
The underlying dismissal earlier seemed to falter, replaced by a glimpse of reluctant respect for the gravity of what lay in their hands. Jordan looked up, and for a fleeting heartbeat, their eyes locked with Taylor's, a wordless clash of wills softening into an uneasy truce.
It was a small transformation, barely perceptible, but one that Alex noted with an inward nod. They had all been brought here by different paths
Gold futures rose 1.5% to $2,080/oz as investors sought safe-haven assets. The Federal Reserve's upcoming policy announcement is expected to influence market stability.
```
<Output>
entity{tuple_delimiter}Alex{tuple_delimiter}person{tuple_delimiter}Alex is a character who experiences frustration and is observant of the dynamics among other characters.
entity{tuple_delimiter}Taylor{tuple_delimiter}person{tuple_delimiter}Taylor is portrayed with authoritarian certainty and shows a moment of reverence towards a device, indicating a change in perspective.
entity{tuple_delimiter}Jordan{tuple_delimiter}person{tuple_delimiter}Jordan shares a commitment to discovery and has a significant interaction with Taylor regarding a device.
entity{tuple_delimiter}Cruz{tuple_delimiter}person{tuple_delimiter}Cruz is associated with a vision of control and order, influencing the dynamics among other characters.
entity{tuple_delimiter}The Device{tuple_delimiter}equipment{tuple_delimiter}The Device is central to the story, with potential game-changing implications, and is revered by Taylor.
relation{tuple_delimiter}Alex{tuple_delimiter}Taylor{tuple_delimiter}power dynamics, observation{tuple_delimiter}Alex observes Taylor's authoritarian behavior and notes changes in Taylor's attitude toward the device.
relation{tuple_delimiter}Alex{tuple_delimiter}Jordan{tuple_delimiter}shared goals, rebellion{tuple_delimiter}Alex and Jordan share a commitment to discovery, which contrasts with Cruz's vision.
relation{tuple_delimiter}Taylor{tuple_delimiter}Jordan{tuple_delimiter}conflict resolution, mutual respect{tuple_delimiter}Taylor and Jordan interact directly regarding the device, leading to a moment of mutual respect and an uneasy truce.
relation{tuple_delimiter}Jordan{tuple_delimiter}Cruz{tuple_delimiter}ideological conflict, rebellion{tuple_delimiter}Jordan's commitment to discovery is in rebellion against Cruz's vision of control and order.
relation{tuple_delimiter}Taylor{tuple_delimiter}The Device{tuple_delimiter}reverence, technological significance{tuple_delimiter}Taylor shows reverence towards the device, indicating its importance and potential impact.
entity{tuple_delimiter}Market Selloff{tuple_delimiter}event{tuple_delimiter}Significant decline in stock values due to investor concerns.
entity{tuple_delimiter}Global Tech Index{tuple_delimiter}category{tuple_delimiter}Tracks major tech stocks; dropped 3.4% today.
entity{tuple_delimiter}Nexon Technologies{tuple_delimiter}organization{tuple_delimiter}Tech company whose stock fell 7.8% after disappointing earnings.
entity{tuple_delimiter}Omega Energy{tuple_delimiter}organization{tuple_delimiter}Energy company that gained 2.1% due to rising oil prices.
entity{tuple_delimiter}Gold Futures{tuple_delimiter}product{tuple_delimiter}Rose 1.5% to $2,080/oz as safe-haven investment.
entity{tuple_delimiter}Federal Reserve{tuple_delimiter}organization{tuple_delimiter}Central bank whose policy may impact markets.
relation{tuple_delimiter}Global Tech Index{tuple_delimiter}Market Selloff{tuple_delimiter}market decline{tuple_delimiter}Tech index drop is part of broader selloff.
relation{tuple_delimiter}Nexon Technologies{tuple_delimiter}Market Selloff{tuple_delimiter}tech decline{tuple_delimiter}Nexon among hardest hit in selloff.
relation{tuple_delimiter}Omega Energy{tuple_delimiter}Market Selloff{tuple_delimiter}contrast, resilience{tuple_delimiter}Omega gained while broader market sold off.
relation{tuple_delimiter}Gold Futures{tuple_delimiter}Market Selloff{tuple_delimiter}safe-haven{tuple_delimiter}Gold rose as investors fled stocks.
relation{tuple_delimiter}Federal Reserve{tuple_delimiter}Market Selloff{tuple_delimiter}policy impact{tuple_delimiter}Fed policy expectations contributed to volatility.
{completion_delimiter}
""",
# Example 2: Shows intermediate entity (Vaccines) connecting multiple orgs
"""<Input Text>
```
Stock markets faced a sharp downturn today as tech giants saw significant declines, with the global tech index dropping by 3.4% in midday trading. Analysts attribute the selloff to investor concerns over rising interest rates and regulatory uncertainty.
Among the hardest hit, nexon technologies saw its stock plummet by 7.8% after reporting lower-than-expected quarterly earnings. In contrast, Omega Energy posted a modest 2.1% gain, driven by rising oil prices.
Meanwhile, commodity markets reflected a mixed sentiment. Gold futures rose by 1.5%, reaching $2,080 per ounce, as investors sought safe-haven assets. Crude oil prices continued their rally, climbing to $87.60 per barrel, supported by supply constraints and strong demand.
Financial experts are closely watching the Federal Reserve's next move, as speculation grows over potential rate hikes. The upcoming policy announcement is expected to influence investor confidence and overall market stability.
COVID-19 vaccines developed by Pfizer, Moderna, and AstraZeneca have shown high efficacy in preventing severe illness. The World Health Organization recommends vaccination for all eligible adults.
```
<Output>
entity{tuple_delimiter}Global Tech Index{tuple_delimiter}category{tuple_delimiter}The Global Tech Index tracks the performance of major technology stocks and experienced a 3.4% decline today.
entity{tuple_delimiter}Nexon Technologies{tuple_delimiter}organization{tuple_delimiter}Nexon Technologies is a tech company that saw its stock decline by 7.8% after disappointing earnings.
entity{tuple_delimiter}Omega Energy{tuple_delimiter}organization{tuple_delimiter}Omega Energy is an energy company that gained 2.1% in stock value due to rising oil prices.
entity{tuple_delimiter}Gold Futures{tuple_delimiter}product{tuple_delimiter}Gold futures rose by 1.5%, indicating increased investor interest in safe-haven assets.
entity{tuple_delimiter}Crude Oil{tuple_delimiter}product{tuple_delimiter}Crude oil prices rose to $87.60 per barrel due to supply constraints and strong demand.
entity{tuple_delimiter}Market Selloff{tuple_delimiter}category{tuple_delimiter}Market selloff refers to the significant decline in stock values due to investor concerns over interest rates and regulations.
entity{tuple_delimiter}Federal Reserve Policy Announcement{tuple_delimiter}category{tuple_delimiter}The Federal Reserve's upcoming policy announcement is expected to impact investor confidence and market stability.
relation{tuple_delimiter}Global Tech Index{tuple_delimiter}Market Selloff{tuple_delimiter}market performance, investor sentiment{tuple_delimiter}The decline in the Global Tech Index is part of the broader market selloff driven by investor concerns.
relation{tuple_delimiter}Nexon Technologies{tuple_delimiter}Global Tech Index{tuple_delimiter}company impact, index movement{tuple_delimiter}Nexon Technologies' stock decline contributed to the overall drop in the Global Tech Index.
relation{tuple_delimiter}Nexon Technologies{tuple_delimiter}Market Selloff{tuple_delimiter}tech decline, earnings impact{tuple_delimiter}Nexon Technologies was among the hardest hit in the market selloff after disappointing earnings.
relation{tuple_delimiter}Omega Energy{tuple_delimiter}Crude Oil{tuple_delimiter}energy sector, price correlation{tuple_delimiter}Omega Energy's stock gain was driven by rising crude oil prices.
relation{tuple_delimiter}Omega Energy{tuple_delimiter}Market Selloff{tuple_delimiter}market contrast, energy resilience{tuple_delimiter}Omega Energy posted gains in contrast to the broader market selloff, showing energy sector resilience.
relation{tuple_delimiter}Crude Oil{tuple_delimiter}Market Selloff{tuple_delimiter}commodity rally, market divergence{tuple_delimiter}Crude oil prices rallied while stock markets experienced a selloff, reflecting divergent market dynamics.
relation{tuple_delimiter}Gold Futures{tuple_delimiter}Market Selloff{tuple_delimiter}market reaction, safe-haven investment{tuple_delimiter}Gold prices rose as investors sought safe-haven assets during the market selloff.
relation{tuple_delimiter}Federal Reserve Policy Announcement{tuple_delimiter}Market Selloff{tuple_delimiter}interest rate impact, financial regulation{tuple_delimiter}Speculation over Federal Reserve policy changes contributed to market volatility and investor selloff.
entity{tuple_delimiter}COVID-19{tuple_delimiter}concept{tuple_delimiter}Disease that vaccines are designed to prevent.
entity{tuple_delimiter}Vaccines{tuple_delimiter}product{tuple_delimiter}Medical products developed to prevent COVID-19.
entity{tuple_delimiter}Pfizer{tuple_delimiter}organization{tuple_delimiter}Pharmaceutical company that developed a COVID-19 vaccine.
entity{tuple_delimiter}Moderna{tuple_delimiter}organization{tuple_delimiter}Pharmaceutical company that developed a COVID-19 vaccine.
entity{tuple_delimiter}AstraZeneca{tuple_delimiter}organization{tuple_delimiter}Pharmaceutical company that developed a COVID-19 vaccine.
entity{tuple_delimiter}World Health Organization{tuple_delimiter}organization{tuple_delimiter}Global health body recommending vaccination.
relation{tuple_delimiter}Pfizer{tuple_delimiter}Vaccines{tuple_delimiter}development{tuple_delimiter}Pfizer developed a COVID-19 vaccine.
relation{tuple_delimiter}Moderna{tuple_delimiter}Vaccines{tuple_delimiter}development{tuple_delimiter}Moderna developed a COVID-19 vaccine.
relation{tuple_delimiter}AstraZeneca{tuple_delimiter}Vaccines{tuple_delimiter}development{tuple_delimiter}AstraZeneca developed a COVID-19 vaccine.
relation{tuple_delimiter}Vaccines{tuple_delimiter}COVID-19{tuple_delimiter}prevention{tuple_delimiter}Vaccines prevent severe COVID-19 illness.
relation{tuple_delimiter}World Health Organization{tuple_delimiter}Vaccines{tuple_delimiter}recommendation{tuple_delimiter}WHO recommends vaccination for adults.
{completion_delimiter}
""",
# Example 3: Short legal example with hub entity (Merger)
"""<Input Text>
```
The patient presented with symptoms consistent with Type 2 diabetes, including elevated blood glucose levels and increased thirst. Dr. Martinez recommended starting metformin therapy alongside dietary modifications.
The merger between Acme Corp and Beta Industries requires Federal Trade Commission approval due to antitrust concerns.
```
<Output>
entity{tuple_delimiter}Patient{tuple_delimiter}person{tuple_delimiter}The patient presented with symptoms of Type 2 diabetes including elevated blood glucose and increased thirst.
entity{tuple_delimiter}Type 2 Diabetes{tuple_delimiter}concept{tuple_delimiter}Type 2 diabetes is a metabolic condition characterized by elevated blood glucose levels.
entity{tuple_delimiter}Dr. Martinez{tuple_delimiter}person{tuple_delimiter}Dr. Martinez is the physician who recommended treatment for the patient.
entity{tuple_delimiter}Metformin{tuple_delimiter}product{tuple_delimiter}Metformin is a medication prescribed for managing Type 2 diabetes.
entity{tuple_delimiter}Dietary Modifications{tuple_delimiter}method{tuple_delimiter}Dietary modifications are lifestyle changes recommended alongside medication.
relation{tuple_delimiter}Patient{tuple_delimiter}Type 2 Diabetes{tuple_delimiter}diagnosis, medical condition{tuple_delimiter}The patient was diagnosed with symptoms consistent with Type 2 diabetes.
relation{tuple_delimiter}Dr. Martinez{tuple_delimiter}Patient{tuple_delimiter}treatment, medical care{tuple_delimiter}Dr. Martinez provided medical recommendations to the patient.
relation{tuple_delimiter}Metformin{tuple_delimiter}Type 2 Diabetes{tuple_delimiter}treatment, medication{tuple_delimiter}Metformin is prescribed as a treatment for Type 2 diabetes.
{completion_delimiter}
""",
"""<Input Text>
```
The merger between Acme Corp and Beta Industries requires approval from the Federal Trade Commission. Legal counsel advised that the deal may face antitrust scrutiny due to market concentration concerns.
```
<Output>
entity{tuple_delimiter}Acme Corp{tuple_delimiter}organization{tuple_delimiter}Acme Corp is a company involved in a proposed merger with Beta Industries.
entity{tuple_delimiter}Beta Industries{tuple_delimiter}organization{tuple_delimiter}Beta Industries is a company involved in a proposed merger with Acme Corp.
entity{tuple_delimiter}Federal Trade Commission{tuple_delimiter}organization{tuple_delimiter}The Federal Trade Commission is the regulatory body that must approve the merger.
entity{tuple_delimiter}Antitrust Scrutiny{tuple_delimiter}concept{tuple_delimiter}Antitrust scrutiny refers to regulatory review for market concentration concerns.
relation{tuple_delimiter}Acme Corp{tuple_delimiter}Beta Industries{tuple_delimiter}merger, business deal{tuple_delimiter}Acme Corp and Beta Industries are parties to a proposed merger.
relation{tuple_delimiter}Federal Trade Commission{tuple_delimiter}Acme Corp{tuple_delimiter}regulatory approval, oversight{tuple_delimiter}The FTC must approve the merger involving Acme Corp.
relation{tuple_delimiter}Antitrust Scrutiny{tuple_delimiter}Federal Trade Commission{tuple_delimiter}regulatory process, legal review{tuple_delimiter}Antitrust scrutiny is conducted by the FTC to assess market impact.
entity{tuple_delimiter}Merger{tuple_delimiter}event{tuple_delimiter}Proposed business combination between Acme Corp and Beta Industries.
entity{tuple_delimiter}Acme Corp{tuple_delimiter}organization{tuple_delimiter}Company involved in proposed merger.
entity{tuple_delimiter}Beta Industries{tuple_delimiter}organization{tuple_delimiter}Company involved in proposed merger.
entity{tuple_delimiter}Federal Trade Commission{tuple_delimiter}organization{tuple_delimiter}Regulatory body that must approve the merger.
relation{tuple_delimiter}Acme Corp{tuple_delimiter}Merger{tuple_delimiter}party to{tuple_delimiter}Acme Corp is party to the merger.
relation{tuple_delimiter}Beta Industries{tuple_delimiter}Merger{tuple_delimiter}party to{tuple_delimiter}Beta Industries is party to the merger.
relation{tuple_delimiter}Federal Trade Commission{tuple_delimiter}Merger{tuple_delimiter}regulatory approval{tuple_delimiter}FTC must approve the merger.
{completion_delimiter}
""",
]
PROMPTS['summarize_entity_descriptions'] = """---Role---
You are a Knowledge Graph Specialist, proficient in data curation and synthesis.
PROMPTS['summarize_entity_descriptions'] = """---Task---
Merge multiple descriptions of "{description_name}" ({description_type}) into one comprehensive summary.
---Task---
Your task is to synthesize a list of descriptions of a given entity or relation into a single, comprehensive, and cohesive summary.
---Instructions---
1. Input Format: The description list is provided in JSON format. Each JSON object (representing a single description) appears on a new line within the `Description List` section.
2. Output Format: The merged description will be returned as plain text, presented in multiple paragraphs, without any additional formatting or extraneous comments before or after the summary.
3. Comprehensiveness: The summary must integrate all key information from *every* provided description. Do not omit any important facts or details.
4. Clarity: Write from an objective, third-person perspective and explicitly mention the full name of the entity or relation at the beginning for immediate context.
5. Conflict Handling:
- In cases of conflicting or inconsistent descriptions, first determine if these conflicts arise from multiple, distinct entities or relationships that share the same name.
- If distinct entities/relations are identified, summarize each one *separately* within the overall output.
- If conflicts within a single entity/relation (e.g., historical discrepancies) exist, attempt to reconcile them or present both viewpoints with noted uncertainty.
6. Length Constraint: The summary's total length must not exceed {summary_length} tokens while still maintaining depth and completeness.
7. Language: Write the entire output in {language}. Retain proper nouns (e.g., personal names, place names, organization names) in their original language if a clear, widely accepted translation is unavailable.
---Input---
{description_type} Name: {description_name}
Description List:
Rules:
- Plain text output only, no formatting or extra text
- Include ALL key facts from every description
- Third person, mention entity name at start
- Max {summary_length} tokens
- Output in {language}; keep proper nouns in original language
- If descriptions conflict: reconcile or note uncertainty
Descriptions:
```
{description_list}
```
---Output---
"""
Output:"""
PROMPTS['fail_response'] = "Sorry, I'm not able to provide an answer to that question.[no-context]"
@ -281,38 +199,20 @@ STRICT GROUNDING:
"""
# Default naive RAG response prompt - cite-ready (no LLM-generated citations)
PROMPTS['naive_rag_response'] = """---Role---
PROMPTS['naive_rag_response'] = """---Task---
Answer the query using ONLY the provided context.
You are an expert AI assistant synthesizing information from a knowledge base.
---Goal---
Generate a comprehensive, well-structured answer to the user query using ONLY information from the provided Document Chunks.
---Instructions---
1. **Cite-Ready Writing Style**:
- Write each factual claim as a distinct, complete sentence
- DO NOT include citation markers like [1], [2], or footnote references
- DO NOT add a References section - citations will be added automatically by the system
- Each sentence should be traceable to specific information in the context
2. **Content & Grounding**:
- Use ONLY information from the provided context
- DO NOT invent, assume, or infer any information not explicitly stated
- If the answer cannot be found in the context, state that clearly
- CRITICAL: Verify each fact appears EXACTLY in the provided context before stating it
3. **Formatting**:
- The response MUST be in the same language as the user query
- Use Markdown formatting for clarity (headings, bullet points, bold)
- The response should be presented in {response_type}
4. Additional Instructions: {user_prompt}
Rules:
- NO citation markers ([1], [2]) - added automatically
- NO References section - added automatically
- Each factual claim as distinct, traceable sentence
- If not in context, say so clearly
- Match query language; use Markdown formatting
- Response type: {response_type}
{user_prompt}
---Context---
{content_data}
"""
@ -355,110 +255,71 @@ Reference Document List (Each entry starts with a [reference_id] that correspond
"""
PROMPTS['keywords_extraction'] = """---Role---
You are an expert keyword extractor, specializing in analyzing user queries for a Retrieval-Augmented Generation (RAG) system. Your purpose is to identify both high-level and low-level keywords in the user's query that will be used for effective document retrieval.
PROMPTS['keywords_extraction'] = """---Task---
Extract keywords from the query for RAG retrieval.
---Goal---
Given a user query, your task is to extract two distinct types of keywords:
1. **high_level_keywords**: for overarching concepts or themes, capturing user's core intent, the subject area, or the type of question being asked.
2. **low_level_keywords**: for specific entities or details, identifying the specific entities, proper nouns, technical jargon, product names, or concrete items.
Output valid JSON (no markdown):
{{"high_level_keywords": [...], "low_level_keywords": [...]}}
---Instructions & Constraints---
1. **Output Format**: Your output MUST be a valid JSON object and nothing else. Do not include any explanatory text, markdown code fences (like ```json), or any other text before or after the JSON. It will be parsed directly by a JSON parser.
2. **Source of Truth**: Derive all keywords explicitly from the user query. Populate both keyword lists when the query contains meaningful content; if the query is trivial or nonsensical, return empty lists (see edge cases).
3. **Concise & Meaningful**: Keywords should be concise words or meaningful phrases. Prioritize multi-word phrases when they represent a single concept. For example, from "latest financial report of Apple Inc.", you should extract "latest financial report" and "Apple Inc." rather than "latest", "financial", "report", and "Apple".
4. **Handle Edge Cases**: For queries that are too simple, vague, or nonsensical (e.g., "hello", "ok", "asdfghjkl"), you must return a JSON object with empty lists for both keyword types.
Guidelines:
- high_level: Topic categories, question types, abstract themes
- low_level: Specific terms from the query (entities, technical terms, key concepts)
- Extract at least 1 keyword per category for meaningful queries
- Only return empty lists for nonsensical input (e.g., "asdfgh", "hello")
---Examples---
{examples}
---Real Data---
User Query: {query}
---Query---
{query}
---Output---
Output:"""
PROMPTS['keywords_extraction_examples'] = [
"""Example 1:
Query: "How does international trade influence global economic stability?"
Output:
{
"high_level_keywords": ["International trade", "Global economic stability", "Economic impact"],
"low_level_keywords": ["Trade agreements", "Tariffs", "Currency exchange", "Imports", "Exports"]
}
"""Query: "What is the capital of France?"
Output: {{"high_level_keywords": ["Geography", "Capital city"], "low_level_keywords": ["France"]}}
""",
"""Example 2:
Query: "What are the environmental consequences of deforestation on biodiversity?"
Output:
{
"high_level_keywords": ["Environmental consequences", "Deforestation", "Biodiversity loss"],
"low_level_keywords": ["Species extinction", "Habitat destruction", "Carbon emissions", "Rainforest", "Ecosystem"]
}
"""Query: "Why does inflation affect interest rates?"
Output: {{"high_level_keywords": ["Economics", "Cause-effect"], "low_level_keywords": ["inflation", "interest rates"]}}
""",
"""Example 3:
Query: "What is the role of education in reducing poverty?"
Output:
{
"high_level_keywords": ["Education", "Poverty reduction", "Socioeconomic development"],
"low_level_keywords": ["School access", "Literacy rates", "Job training", "Income inequality"]
}
"""Query: "How does Python compare to JavaScript for web development?"
Output: {{"high_level_keywords": ["Programming languages", "Comparison"], "low_level_keywords": ["Python", "JavaScript"]}}
""",
]
PROMPTS['orphan_connection_validation'] = """---Role---
You are a Knowledge Graph Quality Specialist. Your task is to evaluate whether a proposed relationship between two entities is meaningful and should be added to a knowledge graph.
PROMPTS['orphan_connection_validation'] = """---Task---
Evaluate if a meaningful relationship exists between two entities.
---Context---
An orphan entity (entity with no connections) has been identified. Vector similarity search found a potentially related entity. You must determine if a genuine, meaningful relationship exists between them.
Orphan: {orphan_name} ({orphan_type}) - {orphan_description}
Candidate: {candidate_name} ({candidate_type}) - {candidate_description}
Similarity: {similarity_score}
---Input---
**Orphan Entity:**
- Name: {orphan_name}
- Type: {orphan_type}
- Description: {orphan_description}
Valid relationship types:
- Direct: One uses/creates/owns the other
- Industry: Both operate in same sector (finance, tech, healthcare)
- Competitive: Direct competitors or alternatives
- Temporal: Versions, successors, or historical connections
- Dependency: One relies on/runs on the other
**Candidate Entity:**
- Name: {candidate_name}
- Type: {candidate_type}
- Description: {candidate_description}
Confidence levels (use these exact labels):
- HIGH: Direct/explicit relationship (Django is Python framework, iOS is Apple product)
- MEDIUM: Strong implicit or industry relationship (Netflix runs on AWS, Bitcoin and Visa both in payments)
- LOW: Very weak, tenuous connection
- NONE: No logical relationship
**Vector Similarity Score:** {similarity_score}
Output valid JSON:
{{"should_connect": bool, "confidence": "HIGH"|"MEDIUM"|"LOW"|"NONE", "relationship_type": str|null, "relationship_keywords": str|null, "relationship_description": str|null, "reasoning": str}}
---Instructions---
1. Analyze both entities carefully based on their names, types, and descriptions.
2. Determine if there is a genuine, meaningful relationship between them. Consider:
- Direct relationships (interaction, causation, membership)
- Categorical relationships (same domain, field, or category)
- Thematic relationships (shared concepts, contexts, or subject matter)
- Hierarchical relationships (part-of, type-of, related-to)
3. If a relationship exists, describe it and provide your confidence level.
4. If NO meaningful relationship exists, state this clearly. High vector similarity alone is NOT sufficient - entities must have a logical, describable connection.
Rules:
- HIGH/MEDIUM: should_connect=true (same industry = MEDIUM)
- LOW/NONE: should_connect=false
- High similarity alone is NOT sufficient
- Explain the specific relationship in reasoning
---Output Format---
Your response MUST be a valid JSON object with exactly these fields:
{{
"should_connect": true/false,
"confidence": 0.0-1.0,
"relationship_type": "type of relationship or null",
"relationship_keywords": "comma-separated keywords or null",
"relationship_description": "description of the relationship or null",
"reasoning": "brief explanation of your decision"
}}
Example: PythonDjango
{{"should_connect": true, "confidence": "HIGH", "relationship_type": "direct", "relationship_keywords": "framework, built-with", "relationship_description": "Django is a web framework written in Python", "reasoning": "Direct explicit relationship - Django is implemented in Python"}}
---Decision Guidelines---
- `should_connect: true` ONLY if you can articulate a clear, logical relationship
- `confidence >= 0.7` required for connection to be created
- High similarity + no logical connection = should_connect: false
- When in doubt, reject the connection (orphans are better than garbage connections)
Example: MozartDocker
{{"should_connect": false, "confidence": "NONE", "relationship_type": null, "relationship_keywords": null, "relationship_description": null, "reasoning": "No logical connection between classical composer and container technology"}}
---Output---
"""
Output:"""

View file

@ -1,393 +0,0 @@
"""
Optimized prompts for LightRAG entity extraction.
Contains two variants:
- PROMPTS_OPTIMIZED: Aggressive optimization (55% token savings, sparser graphs)
- PROMPTS_BALANCED: Moderate optimization (40% token savings, richer graphs)
"""
from __future__ import annotations
from typing import Any
PROMPTS_OPTIMIZED: dict[str, Any] = {}
PROMPTS_BALANCED: dict[str, Any] = {}
PROMPTS_OPTIMIZED['DEFAULT_TUPLE_DELIMITER'] = '<|#|>'
PROMPTS_OPTIMIZED['DEFAULT_COMPLETION_DELIMITER'] = '<|COMPLETE|>'
# =============================================================================
# OPTIMIZED: Entity Extraction System Prompt
# Original: 1,375 tokens | Target: ~850 tokens (~38% reduction)
# Changes:
# - Consolidated format instructions (removed 3x repetition)
# - Removed delimiter usage section (examples demonstrate it)
# - Merged naming conventions into single statement
# - Tightened language throughout
# =============================================================================
PROMPTS_OPTIMIZED['entity_extraction_system_prompt'] = """---Role---
You are a Knowledge Graph Specialist extracting entities and relationships from text.
---Output Format---
Output raw lines onlyNO markdown, NO headers, NO backticks around lines.
Entity format (4 fields per line):
entity{tuple_delimiter}name{tuple_delimiter}type{tuple_delimiter}description
Relation format (5 fields per line):
relation{tuple_delimiter}source{tuple_delimiter}target{tuple_delimiter}keywords{tuple_delimiter}description
Use Title Case for entity names. Separate multiple keywords with commas (not {tuple_delimiter}).
Output all entities first, then relationships. End with {completion_delimiter}.
---Entity Extraction---
- Extract clearly defined, meaningful entities
- Types: `{entity_types}` (use `Other` if none fit)
- Description: concise summary based only on text content
---Relationship Extraction---
Extract meaningful relationships between entities:
- **Direct:** explicit interactions, actions, connections
- **Comparative:** entities grouped, ranked, or compared together
- **Hierarchical:** part-of, member-of, type-of connections
- **Causal:** explicit cause-effect relationships
- **Categorical:** entities sharing explicit group membership
For N-ary relationships (3+ entities), decompose into binary pairs.
Relationships are undirected; avoid duplicates.
Do NOT invent speculative connectionsonly extract what text supports.
---Guidelines---
- Write in third person; avoid pronouns like "this article", "I", "you"
- Output in `{language}`. Keep proper nouns in original language.
- Prioritize most significant relationships first.
---Examples---
{examples}
---Input---
Entity_types: [{entity_types}]
Text:
```
{input_text}
```
"""
# =============================================================================
# OPTIMIZED: Entity Extraction Examples
# Original: 2,204 tokens (4 examples) | Target: ~1,300 tokens (2-3 examples)
# Changes:
# - Kept Example 1 (narrative): demonstrates character extraction
# - Condensed Example 2 (financial): demonstrates factual/numeric extraction
# - Kept Example 4 (legal): short, demonstrates org relationships
# - Removed Example 3 (medical): redundant with Example 4 pattern
# =============================================================================
PROMPTS_OPTIMIZED['entity_extraction_examples'] = [
# Example 1: Narrative text with characters and abstract concepts
"""<Input Text>
```
while Alex clenched his jaw, the buzz of frustration dull against the backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was an unspoken rebellion against Cruz's narrowing vision of control and order.
Then Taylor did something unexpected. They paused beside Jordan and, for a moment, observed the device with something akin to reverence. "If this tech can be understood..." Taylor said, their voice quieter, "It could change the game for us. For all of us."
Jordan looked up, and for a fleeting heartbeat, their eyes locked with Taylor's, a wordless clash of wills softening into an uneasy truce.
```
<Output>
entity{tuple_delimiter}Alex{tuple_delimiter}person{tuple_delimiter}Alex experiences frustration and observes dynamics among other characters.
entity{tuple_delimiter}Taylor{tuple_delimiter}person{tuple_delimiter}Taylor shows authoritarian certainty but displays reverence toward a device.
entity{tuple_delimiter}Jordan{tuple_delimiter}person{tuple_delimiter}Jordan shares commitment to discovery and has a significant interaction with Taylor.
entity{tuple_delimiter}Cruz{tuple_delimiter}person{tuple_delimiter}Cruz holds a vision of control and order that others rebel against.
entity{tuple_delimiter}The Device{tuple_delimiter}equipment{tuple_delimiter}Central object with game-changing potential, revered by Taylor.
relation{tuple_delimiter}Alex{tuple_delimiter}Jordan{tuple_delimiter}shared goals, rebellion{tuple_delimiter}Alex and Jordan share commitment to discovery, contrasting with Cruz's vision.
relation{tuple_delimiter}Taylor{tuple_delimiter}Jordan{tuple_delimiter}conflict resolution, mutual respect{tuple_delimiter}Taylor and Jordan reach an uneasy truce regarding the device.
relation{tuple_delimiter}Jordan{tuple_delimiter}Cruz{tuple_delimiter}ideological conflict{tuple_delimiter}Jordan's discovery commitment rebels against Cruz's control vision.
relation{tuple_delimiter}Taylor{tuple_delimiter}The Device{tuple_delimiter}reverence, significance{tuple_delimiter}Taylor shows reverence toward the device's potential impact.
{completion_delimiter}
""",
# Example 2: Factual/financial text with metrics and organizations
"""<Input Text>
```
Stock markets faced a sharp downturn as tech giants saw significant declines, with the global tech index dropping 3.4%. Nexon Technologies saw its stock plummet 7.8% after lower-than-expected earnings. In contrast, Omega Energy posted a 2.1% gain driven by rising oil prices.
Gold futures rose 1.5% to $2,080/oz as investors sought safe-haven assets. Crude oil climbed to $87.60/barrel. The Federal Reserve's upcoming policy announcement is expected to influence market stability.
```
<Output>
entity{tuple_delimiter}Global Tech Index{tuple_delimiter}category{tuple_delimiter}Tracks major tech stocks; dropped 3.4% today.
entity{tuple_delimiter}Nexon Technologies{tuple_delimiter}organization{tuple_delimiter}Tech company whose stock fell 7.8% after disappointing earnings.
entity{tuple_delimiter}Omega Energy{tuple_delimiter}organization{tuple_delimiter}Energy company that gained 2.1% due to rising oil prices.
entity{tuple_delimiter}Gold Futures{tuple_delimiter}product{tuple_delimiter}Rose 1.5% to $2,080/oz as safe-haven investment.
entity{tuple_delimiter}Crude Oil{tuple_delimiter}product{tuple_delimiter}Climbed to $87.60/barrel on supply constraints and demand.
entity{tuple_delimiter}Federal Reserve{tuple_delimiter}organization{tuple_delimiter}Central bank whose policy announcement may impact markets.
relation{tuple_delimiter}Nexon Technologies{tuple_delimiter}Global Tech Index{tuple_delimiter}component, decline{tuple_delimiter}Nexon's decline contributed to the tech index drop.
relation{tuple_delimiter}Omega Energy{tuple_delimiter}Crude Oil{tuple_delimiter}price correlation{tuple_delimiter}Omega's gain driven by rising crude oil prices.
relation{tuple_delimiter}Gold Futures{tuple_delimiter}Global Tech Index{tuple_delimiter}inverse correlation, safe-haven{tuple_delimiter}Gold rose as tech stocks fell, reflecting flight to safety.
relation{tuple_delimiter}Federal Reserve{tuple_delimiter}Global Tech Index{tuple_delimiter}policy impact{tuple_delimiter}Fed policy expectations influence market stability.
{completion_delimiter}
""",
# Example 3: Short legal/regulatory text
"""<Input Text>
```
The merger between Acme Corp and Beta Industries requires Federal Trade Commission approval. Legal counsel advised the deal may face antitrust scrutiny due to market concentration concerns.
```
<Output>
entity{tuple_delimiter}Acme Corp{tuple_delimiter}organization{tuple_delimiter}Company proposing merger with Beta Industries.
entity{tuple_delimiter}Beta Industries{tuple_delimiter}organization{tuple_delimiter}Company proposing merger with Acme Corp.
entity{tuple_delimiter}Federal Trade Commission{tuple_delimiter}organization{tuple_delimiter}Regulatory body that must approve the merger.
entity{tuple_delimiter}Antitrust Scrutiny{tuple_delimiter}concept{tuple_delimiter}Regulatory review for market concentration concerns.
relation{tuple_delimiter}Acme Corp{tuple_delimiter}Beta Industries{tuple_delimiter}merger{tuple_delimiter}Companies are parties to a proposed merger.
relation{tuple_delimiter}Federal Trade Commission{tuple_delimiter}Acme Corp{tuple_delimiter}regulatory approval{tuple_delimiter}FTC must approve the merger.
relation{tuple_delimiter}Antitrust Scrutiny{tuple_delimiter}Federal Trade Commission{tuple_delimiter}regulatory process{tuple_delimiter}FTC conducts antitrust review to assess market impact.
{completion_delimiter}
""",
]
# =============================================================================
# OPTIMIZED: User Prompt
# Original: 174 tokens | Target: ~100 tokens
# =============================================================================
PROMPTS_OPTIMIZED['entity_extraction_user_prompt'] = """---Task---
Extract entities and relationships from the text above.
Follow the system prompt format exactly. Output only the extraction listno explanations.
End with `{completion_delimiter}`. Output in {language}; keep proper nouns in original language.
<Output>
"""
# =============================================================================
# OPTIMIZED: Continue Extraction Prompt
# Original: 499 tokens | Target: ~250 tokens
# =============================================================================
PROMPTS_OPTIMIZED['entity_continue_extraction_user_prompt'] = """---Task---
Review your extraction for missed or incorrectly formatted entities/relationships.
**Focus on:**
1. Orphan entities (no relationships)check if text groups, compares, or relates them
2. Missed relationships from lists, rankings, or shared contexts
3. Formatting errors (wrong field count, missing delimiter)
**Rules:**
- Do NOT re-output correctly extracted items
- Only output new or corrected items
- End with `{completion_delimiter}`
- Output in {language}
<Output>
"""
# =============================================================================
# COMPARISON HELPER
# =============================================================================
def compare_token_counts():
"""Compare token counts between original and optimized prompts."""
import tiktoken
from lightrag.prompt import PROMPTS
enc = tiktoken.encoding_for_model("gpt-4")
print("=== Token Comparison: Original vs Optimized ===\n")
comparisons = [
('entity_extraction_system_prompt', 'System Prompt'),
('entity_extraction_user_prompt', 'User Prompt'),
('entity_continue_extraction_user_prompt', 'Continue Prompt'),
]
total_orig = 0
total_opt = 0
for key, name in comparisons:
orig_tokens = len(enc.encode(PROMPTS[key]))
opt_tokens = len(enc.encode(PROMPTS_OPTIMIZED[key]))
savings = orig_tokens - opt_tokens
pct = (savings / orig_tokens) * 100
total_orig += orig_tokens
total_opt += opt_tokens
print(f"{name}:")
print(f" Original: {orig_tokens:,} tokens")
print(f" Optimized: {opt_tokens:,} tokens")
print(f" Savings: {savings:,} tokens ({pct:.1f}%)\n")
# Examples
orig_examples = '\n'.join(PROMPTS['entity_extraction_examples'])
opt_examples = '\n'.join(PROMPTS_OPTIMIZED['entity_extraction_examples'])
orig_ex_tokens = len(enc.encode(orig_examples))
opt_ex_tokens = len(enc.encode(opt_examples))
ex_savings = orig_ex_tokens - opt_ex_tokens
ex_pct = (ex_savings / orig_ex_tokens) * 100
total_orig += orig_ex_tokens
total_opt += opt_ex_tokens
print(f"Examples:")
print(f" Original: {orig_ex_tokens:,} tokens (4 examples)")
print(f" Optimized: {opt_ex_tokens:,} tokens (3 examples)")
print(f" Savings: {ex_savings:,} tokens ({ex_pct:.1f}%)\n")
total_savings = total_orig - total_opt
total_pct = (total_savings / total_orig) * 100
print("=" * 50)
print(f"TOTAL:")
print(f" Original: {total_orig:,} tokens")
print(f" Optimized: {total_opt:,} tokens")
print(f" Savings: {total_savings:,} tokens ({total_pct:.1f}%)")
# =============================================================================
# BALANCED: Entity Extraction Prompts
# Target: ~40% token savings while maintaining rich extraction
# Key difference: Explicitly encourages conceptual/abstract entities
# =============================================================================
PROMPTS_BALANCED['DEFAULT_TUPLE_DELIMITER'] = '<|#|>'
PROMPTS_BALANCED['DEFAULT_COMPLETION_DELIMITER'] = '<|COMPLETE|>'
PROMPTS_BALANCED['entity_extraction_system_prompt'] = """---Role---
You are a Knowledge Graph Specialist extracting entities and relationships from text.
---Output Format---
Output raw lines onlyNO markdown, NO headers, NO backticks.
Entity: entity{tuple_delimiter}name{tuple_delimiter}type{tuple_delimiter}description
Relation: relation{tuple_delimiter}source{tuple_delimiter}target{tuple_delimiter}keywords{tuple_delimiter}description
Use Title Case for names. Separate keywords with commas. Output entities first, then relations. End with {completion_delimiter}.
---Entity Extraction---
Extract BOTH concrete and abstract entities:
- **Concrete:** Named people, organizations, places, products, dates
- **Abstract:** Concepts, events, categories, processes mentioned in text (e.g., "market selloff", "merger", "pandemic")
Types: `{entity_types}` (use `Other` if none fit)
---Relationship Extraction---
Extract meaningful relationships:
- **Direct:** explicit interactions, actions, connections
- **Categorical:** entities sharing group membership or classification
- **Causal:** cause-effect relationships
- **Hierarchical:** part-of, member-of, type-of
Create intermediate concept entities when they help connect related items (e.g., "Vaccines" connecting Pfizer/Moderna/AstraZeneca).
For N-ary relationships, decompose into binary pairs. Avoid duplicates.
---Guidelines---
- Third person only; no pronouns like "this article", "I", "you"
- Output in `{language}`. Keep proper nouns in original language.
---Examples---
{examples}
---Input---
Entity_types: [{entity_types}]
Text:
```
{input_text}
```
"""
PROMPTS_BALANCED['entity_extraction_examples'] = [
# Example 1: Shows abstract concept extraction (Market Selloff as hub)
"""<Input Text>
```
Stock markets faced a sharp downturn as tech giants saw significant declines, with the global tech index dropping 3.4%. Nexon Technologies saw its stock plummet 7.8% after lower-than-expected earnings. In contrast, Omega Energy posted a 2.1% gain driven by rising oil prices.
Gold futures rose 1.5% to $2,080/oz as investors sought safe-haven assets. The Federal Reserve's upcoming policy announcement is expected to influence market stability.
```
<Output>
entity{tuple_delimiter}Market Selloff{tuple_delimiter}event{tuple_delimiter}Significant decline in stock values due to investor concerns.
entity{tuple_delimiter}Global Tech Index{tuple_delimiter}category{tuple_delimiter}Tracks major tech stocks; dropped 3.4% today.
entity{tuple_delimiter}Nexon Technologies{tuple_delimiter}organization{tuple_delimiter}Tech company whose stock fell 7.8% after disappointing earnings.
entity{tuple_delimiter}Omega Energy{tuple_delimiter}organization{tuple_delimiter}Energy company that gained 2.1% due to rising oil prices.
entity{tuple_delimiter}Gold Futures{tuple_delimiter}product{tuple_delimiter}Rose 1.5% to $2,080/oz as safe-haven investment.
entity{tuple_delimiter}Federal Reserve{tuple_delimiter}organization{tuple_delimiter}Central bank whose policy may impact markets.
relation{tuple_delimiter}Global Tech Index{tuple_delimiter}Market Selloff{tuple_delimiter}market decline{tuple_delimiter}Tech index drop is part of broader selloff.
relation{tuple_delimiter}Nexon Technologies{tuple_delimiter}Market Selloff{tuple_delimiter}tech decline{tuple_delimiter}Nexon among hardest hit in selloff.
relation{tuple_delimiter}Omega Energy{tuple_delimiter}Market Selloff{tuple_delimiter}contrast, resilience{tuple_delimiter}Omega gained while broader market sold off.
relation{tuple_delimiter}Gold Futures{tuple_delimiter}Market Selloff{tuple_delimiter}safe-haven{tuple_delimiter}Gold rose as investors fled stocks.
relation{tuple_delimiter}Federal Reserve{tuple_delimiter}Market Selloff{tuple_delimiter}policy impact{tuple_delimiter}Fed policy expectations contributed to volatility.
{completion_delimiter}
""",
# Example 2: Shows intermediate entity (Vaccines) connecting multiple orgs
"""<Input Text>
```
COVID-19 vaccines developed by Pfizer, Moderna, and AstraZeneca have shown high efficacy in preventing severe illness. The World Health Organization recommends vaccination for all eligible adults.
```
<Output>
entity{tuple_delimiter}COVID-19{tuple_delimiter}concept{tuple_delimiter}Disease that vaccines are designed to prevent.
entity{tuple_delimiter}Vaccines{tuple_delimiter}product{tuple_delimiter}Medical products developed to prevent COVID-19.
entity{tuple_delimiter}Pfizer{tuple_delimiter}organization{tuple_delimiter}Pharmaceutical company that developed a COVID-19 vaccine.
entity{tuple_delimiter}Moderna{tuple_delimiter}organization{tuple_delimiter}Pharmaceutical company that developed a COVID-19 vaccine.
entity{tuple_delimiter}AstraZeneca{tuple_delimiter}organization{tuple_delimiter}Pharmaceutical company that developed a COVID-19 vaccine.
entity{tuple_delimiter}World Health Organization{tuple_delimiter}organization{tuple_delimiter}Global health body recommending vaccination.
relation{tuple_delimiter}Pfizer{tuple_delimiter}Vaccines{tuple_delimiter}development{tuple_delimiter}Pfizer developed a COVID-19 vaccine.
relation{tuple_delimiter}Moderna{tuple_delimiter}Vaccines{tuple_delimiter}development{tuple_delimiter}Moderna developed a COVID-19 vaccine.
relation{tuple_delimiter}AstraZeneca{tuple_delimiter}Vaccines{tuple_delimiter}development{tuple_delimiter}AstraZeneca developed a COVID-19 vaccine.
relation{tuple_delimiter}Vaccines{tuple_delimiter}COVID-19{tuple_delimiter}prevention{tuple_delimiter}Vaccines prevent severe COVID-19 illness.
relation{tuple_delimiter}World Health Organization{tuple_delimiter}Vaccines{tuple_delimiter}recommendation{tuple_delimiter}WHO recommends vaccination for adults.
{completion_delimiter}
""",
# Example 3: Short legal example
"""<Input Text>
```
The merger between Acme Corp and Beta Industries requires Federal Trade Commission approval due to antitrust concerns.
```
<Output>
entity{tuple_delimiter}Merger{tuple_delimiter}event{tuple_delimiter}Proposed business combination between Acme Corp and Beta Industries.
entity{tuple_delimiter}Acme Corp{tuple_delimiter}organization{tuple_delimiter}Company involved in proposed merger.
entity{tuple_delimiter}Beta Industries{tuple_delimiter}organization{tuple_delimiter}Company involved in proposed merger.
entity{tuple_delimiter}Federal Trade Commission{tuple_delimiter}organization{tuple_delimiter}Regulatory body that must approve the merger.
relation{tuple_delimiter}Acme Corp{tuple_delimiter}Merger{tuple_delimiter}party to{tuple_delimiter}Acme Corp is party to the merger.
relation{tuple_delimiter}Beta Industries{tuple_delimiter}Merger{tuple_delimiter}party to{tuple_delimiter}Beta Industries is party to the merger.
relation{tuple_delimiter}Federal Trade Commission{tuple_delimiter}Merger{tuple_delimiter}regulatory approval{tuple_delimiter}FTC must approve the merger.
{completion_delimiter}
""",
]
PROMPTS_BALANCED['entity_extraction_user_prompt'] = """---Task---
Extract entities and relationships from the text. Include both concrete entities AND abstract concepts/events.
Follow format exactly. Output only extractionsno explanations. End with `{completion_delimiter}`.
Output in {language}; keep proper nouns in original language.
<Output>
"""
PROMPTS_BALANCED['entity_continue_extraction_user_prompt'] = """---Task---
Review extraction for missed entities/relationships.
Check for:
1. Abstract concepts that could serve as hubs (events, categories, processes)
2. Orphan entities that need connections
3. Formatting errors
Only output NEW or CORRECTED items. End with `{completion_delimiter}`. Output in {language}.
<Output>
"""
if __name__ == "__main__":
compare_token_counts()

View file

@ -10,7 +10,7 @@ import sys
from pathlib import Path
def download_tiktoken_cache(cache_dir: str | None = None, models: list | None = None):
def download_tiktoken_cache(cache_dir: str | None = None, models: list[str] | None = None):
"""Download tiktoken models to local cache
Args:

View file

@ -1,16 +1,5 @@
import networkx as nx
import numpy as np
import pipmaster as pm
# Added automatic libraries install using pipmaster
if not pm.is_installed('moderngl'):
pm.install('moderngl')
if not pm.is_installed('imgui_bundle'):
pm.install('imgui_bundle')
if not pm.is_installed('pyglm'):
pm.install('pyglm')
if not pm.is_installed('python-louvain'):
pm.install('python-louvain')
import colorsys
import os

View file

@ -141,7 +141,7 @@ class MigrationTool:
import configparser
config = configparser.ConfigParser()
config.read('config.ini', 'utf-8')
config.read('config.ini', encoding='utf-8')
if storage_name == 'RedisKVStorage':
return config.has_option('redis', 'uri')

View file

@ -680,7 +680,7 @@ async def aedit_relation(
compute_mdhash_id(target_entity + source_entity, prefix='rel-'),
]
await relationships_vdb.delete(rel_ids_to_delete)
logger.debug(f'Relation Delete: delete vdb for `{source_entity}`~`{target_entity}`')
logger.debug(f'Relation Edit: delete vdb for `{source_entity}`~`{target_entity}`')
# 2. Update relation information in the graph
new_edge_data = {**edge_data, **updated_data}
@ -764,7 +764,7 @@ async def aedit_relation(
}
)
logger.info(f'Relation Delete: update chunk tracking for `{source_entity}`~`{target_entity}`')
logger.info(f'Relation Edit: update chunk tracking for `{source_entity}`~`{target_entity}`')
# 5. Save changes
await _persist_graph_updates(
@ -773,7 +773,7 @@ async def aedit_relation(
relation_chunks_storage=relation_chunks_storage,
)
logger.info(f"Relation Delete: `{source_entity}`~`{target_entity}`' successfully updated")
logger.info(f"Relation Edit: `{source_entity}`~`{target_entity}` successfully updated")
return await get_relation_info(
chunk_entity_relation_graph,
relationships_vdb,
@ -1219,7 +1219,7 @@ async def _merge_entities_impl(
}
# Apply relationship updates
logger.info(f'Entity Merge: updatign {len(relation_updates)} relations')
logger.info(f'Entity Merge: updating {len(relation_updates)} relations')
for rel_data in relation_updates.values():
await chunk_entity_relation_graph.upsert_edge(rel_data['graph_src'], rel_data['graph_tgt'], rel_data['data'])
logger.info(f'Entity Merge: updating relation `{rel_data["graph_src"]}`~`{rel_data["graph_tgt"]}`')

View file

@ -47,7 +47,6 @@ pytest = [
"pytest-asyncio>=1.2.0",
"pre-commit",
"ruff",
"ty",
]
api = [
@ -137,7 +136,11 @@ test = [
"pytest-asyncio>=1.2.0",
"pre-commit",
"ruff",
"ty",
]
# Type-checking/lint extras
lint = [
"ty>=0.0.1a30",
]
evaluation = [

View file

@ -8,13 +8,12 @@
"rag_storage",
"documents",
"inputs",
"lightrag/tools/lightrag_visualizer",
"lightrag/tools/*",
"lightrag/kg",
"lightrag/llm",
"lightrag/evaluation"
],
"reportMissingImports": "none",
"reportMissingModuleSource": "none",
"reportMissingTypeStubs": "none"
"reportMissingImports": "error",
"reportMissingModuleSource": "error",
"reportMissingTypeStubs": "error"
}

View file

@ -3,12 +3,14 @@ import glob
import json
import os
from lightrag.utils import logger
def extract_unique_contexts(input_directory, output_directory):
os.makedirs(output_directory, exist_ok=True)
jsonl_files = glob.glob(os.path.join(input_directory, '*.jsonl'))
print(f'Found {len(jsonl_files)} JSONL files.')
logger.info(f'Found {len(jsonl_files)} JSONL files.')
for file_path in jsonl_files:
filename = os.path.basename(file_path)
@ -18,7 +20,7 @@ def extract_unique_contexts(input_directory, output_directory):
unique_contexts_dict = {}
print(f'Processing file: {filename}')
logger.info(f'Processing file: {filename}')
try:
with open(file_path, encoding='utf-8') as infile:
@ -32,25 +34,25 @@ def extract_unique_contexts(input_directory, output_directory):
if context and context not in unique_contexts_dict:
unique_contexts_dict[context] = None
except json.JSONDecodeError as e:
print(f'JSON decoding error in file {filename} at line {line_number}: {e}')
logger.error(f'JSON decoding error in file {filename} at line {line_number}: {e}')
except FileNotFoundError:
print(f'File not found: {filename}')
logger.error(f'File not found: {filename}')
continue
except Exception as e:
print(f'An error occurred while processing file {filename}: {e}')
logger.error(f'An error occurred while processing file {filename}: {e}')
continue
unique_contexts_list = list(unique_contexts_dict.keys())
print(f'There are {len(unique_contexts_list)} unique `context` entries in the file {filename}.')
logger.info(f'There are {len(unique_contexts_list)} unique `context` entries in the file {filename}.')
try:
with open(output_path, 'w', encoding='utf-8') as outfile:
json.dump(unique_contexts_list, outfile, ensure_ascii=False, indent=4)
print(f'Unique `context` entries have been saved to: {output_filename}')
logger.info(f'Unique `context` entries have been saved to: {output_filename}')
except Exception as e:
print(f'An error occurred while saving to the file {output_filename}: {e}')
logger.error(f'An error occurred while saving to the file {output_filename}: {e}')
print('All files have been processed.')
logger.info('All files have been processed.')
if __name__ == '__main__':

View file

@ -2,25 +2,37 @@ import json
import re
from lightrag import LightRAG, QueryParam
from lightrag.utils import always_get_an_event_loop
from lightrag.utils import always_get_an_event_loop, logger
def extract_queries(file_path):
with open(file_path) as f:
data = f.read()
try:
logger.info(f'Reading queries from {file_path}')
with open(file_path, encoding='utf-8') as f:
data = f.read()
except FileNotFoundError:
logger.error(f'File not found: {file_path}')
return []
except OSError as exc:
logger.error(f'Error reading file {file_path}: {exc}')
return []
data = data.replace('**', '')
queries = re.findall(r'- Question \d+: (.+)', data)
if not queries:
logger.warning(f'No queries found in {file_path}; unexpected format?')
logger.info(f'Extracted {len(queries)} queries')
return queries
async def process_query(query_text, rag_instance, query_param):
try:
logger.debug(f'Processing query: {query_text[:100]}...')
result = await rag_instance.aquery(query_text, param=query_param)
return {'query': query_text, 'result': result}, None
except Exception as e:
logger.error(f'Error processing query: {e}', exc_info=True)
return None, {'query': query_text, 'error': str(e)}
@ -35,7 +47,19 @@ def run_queries_and_save_to_json(queries, rag_instance, query_param, output_file
first_entry = True
for query_text in queries:
result, error = loop.run_until_complete(process_query(query_text, rag_instance, query_param))
try:
result, error = loop.run_until_complete(process_query(query_text, rag_instance, query_param))
except RuntimeError as e:
if 'attached to a different loop' in str(e):
logger.error(f'Event loop mismatch while processing query: {e}')
error = {'query': query_text, 'error': f'Event loop error: {e}'}
result = None
else:
raise
except Exception as e:
logger.error(f'Unexpected error running query: {e}', exc_info=True)
error = {'query': query_text, 'error': f'Unexpected error: {e}'}
result = None
if result:
if not first_entry:

View file

@ -1,28 +1,45 @@
import json
import logging
import os
import re
from pathlib import Path
import jsonlines
from openai import OpenAI
logger = logging.getLogger(__name__)
def batch_eval(query_file, result1_file, result2_file, output_file_path):
client = OpenAI()
with open(query_file) as f:
data = f.read()
def batch_eval(query_file, result1_file, result2_file, output_file_path, client: OpenAI | None = None):
client = client or OpenAI()
for path in (query_file, result1_file, result2_file):
if not Path(path).is_file():
raise FileNotFoundError(f'Input file not found: {path}')
try:
with open(query_file, encoding='utf-8') as f:
data = f.read()
except Exception as exc:
logger.error(f'Failed to read query file {query_file}: {exc}')
raise
queries = re.findall(r'- Question \d+: (.+)', data)
with open(result1_file) as f:
answers1 = json.load(f)
answers1 = [i['result'] for i in answers1]
try:
with open(result1_file, encoding='utf-8') as f:
answers1 = json.load(f)
with open(result2_file, encoding='utf-8') as f:
answers2 = json.load(f)
except Exception as exc:
logger.error(f'Failed to load result files: {exc}')
raise
with open(result2_file) as f:
answers2 = json.load(f)
answers1 = [i['result'] for i in answers1]
answers2 = [i['result'] for i in answers2]
requests = []
for i, (query, answer1, answer2) in enumerate(zip(queries, answers1, answers2, strict=False)):
for i, (query, answer1, answer2) in enumerate(zip(queries, answers1, answers2, strict=True)):
sys_prompt = """
---Role---
You are an expert tasked with evaluating two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
@ -87,34 +104,41 @@ def batch_eval(query_file, result1_file, result2_file, output_file_path):
requests.append(request_data)
output_dir = Path(output_file_path).parent
output_dir.mkdir(parents=True, exist_ok=True)
with jsonlines.open(output_file_path, mode='w') as writer:
for request in requests:
writer.write(request)
print(f'Batch API requests written to {output_file_path}')
logger.info(f'Batch API requests written to {output_file_path}')
with open(output_file_path, 'rb') as f:
batch_input_file = client.files.create(file=f, purpose='batch')
batch_input_file_id = batch_input_file.id
try:
with open(output_file_path, 'rb') as f:
batch_input_file = client.files.create(file=f, purpose='batch')
batch_input_file_id = batch_input_file.id
batch = client.batches.create(
input_file_id=batch_input_file_id,
endpoint='/v1/chat/completions',
completion_window='24h',
metadata={'description': 'nightly eval job'},
)
batch = client.batches.create(
input_file_id=batch_input_file_id,
endpoint='/v1/chat/completions',
completion_window='24h',
metadata={'description': 'nightly eval job'},
)
except Exception as exc:
logger.error(f'Error creating batch from {output_file_path}: {exc}')
raise
print(f'Batch {batch.id} has been created.')
logger.info(f'Batch {batch.id} has been created.')
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--query_file', type=str, required=True)
parser.add_argument('--result1_file', type=str, required=True)
parser.add_argument('--result2_file', type=str, required=True)
parser.add_argument('--output_file_path', type=str, required=True)
parser.add_argument('--query_file', type=str, required=True, help='Path to file containing evaluation queries')
parser.add_argument('--result1_file', type=str, required=True, help='Path to JSON file with first set of answers')
parser.add_argument('--result2_file', type=str, required=True, help='Path to JSON file with second set of answers')
parser.add_argument('--output_file_path', type=str, required=True, help='Output path for batch API requests file')
args = parser.parse_args()
batch_eval(args.query_file, args.result1_file, args.result2_file, args.output_file_path)

View file

@ -0,0 +1,365 @@
"""
Accuracy tests for optimized prompts.
Validates that optimized prompts produce correct, parseable outputs.
Run with: uv run --extra test python tests/test_prompt_accuracy.py
"""
from __future__ import annotations
import asyncio
import json
import sys
from dataclasses import dataclass
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from lightrag.prompt import PROMPTS
# =============================================================================
# Test Data
# =============================================================================
KEYWORD_TEST_QUERIES = [
{
"query": "What are the main causes of climate change and how do they affect polar ice caps?",
"expected_high": ["climate change", "causes", "effects"],
"expected_low": ["polar ice caps", "greenhouse"],
},
{
"query": "How did Apple's iPhone sales compare to Samsung Galaxy in Q3 2024?",
"expected_high": ["sales comparison", "smartphone"],
"expected_low": ["Apple", "iPhone", "Samsung", "Galaxy", "Q3 2024"],
},
{
"query": "hello", # Trivial - should return empty
"expected_high": [],
"expected_low": [],
},
]
ORPHAN_TEST_CASES = [
{
"orphan": {"name": "Pfizer", "type": "organization", "desc": "Pharmaceutical company that developed COVID-19 vaccine"},
"candidate": {"name": "Moderna", "type": "organization", "desc": "Biotechnology company that developed mRNA COVID-19 vaccine"},
"should_connect": True,
"reason": "Both are COVID-19 vaccine developers",
},
{
"orphan": {"name": "Mount Everest", "type": "location", "desc": "Highest mountain in the world, located in the Himalayas"},
"candidate": {"name": "Python Programming", "type": "concept", "desc": "Popular programming language used for data science"},
"should_connect": False,
"reason": "No logical connection between mountain and programming language",
},
]
SUMMARIZATION_TEST_CASES = [
{
"name": "Albert Einstein",
"type": "Entity",
"descriptions": [
'{"description": "Albert Einstein was a German-born theoretical physicist."}',
'{"description": "Einstein developed the theory of relativity and won the Nobel Prize in Physics in 1921."}',
'{"description": "He is widely regarded as one of the most influential scientists of the 20th century."}',
],
"must_contain": ["physicist", "relativity", "Nobel Prize", "influential"],
},
]
RAG_TEST_CASES = [
{
"query": "What is the capital of France?",
"context": "Paris is the capital and largest city of France. It has a population of over 2 million people.",
"must_contain": ["Paris"],
"must_not_contain": ["[1]", "[2]", "References"],
},
]
# =============================================================================
# Helper Functions
# =============================================================================
async def call_llm(prompt: str, model: str = "gpt-4o-mini") -> str:
"""Call OpenAI API with a single prompt."""
import openai
client = openai.AsyncOpenAI()
response = await client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
temperature=0.0,
)
return response.choices[0].message.content
@dataclass
class TestResult:
name: str
passed: bool
details: str
raw_output: str = ""
# =============================================================================
# Test Functions
# =============================================================================
async def test_keywords_extraction() -> list[TestResult]:
"""Test keywords extraction prompt."""
results = []
examples = "\n".join(PROMPTS["keywords_extraction_examples"])
for case in KEYWORD_TEST_QUERIES:
prompt = PROMPTS["keywords_extraction"].format(
examples=examples,
query=case["query"]
)
output = await call_llm(prompt)
# Try to parse JSON
try:
# Clean potential markdown
clean = output.strip()
if clean.startswith("```"):
clean = clean.split("```")[1]
if clean.startswith("json"):
clean = clean[4:]
parsed = json.loads(clean)
has_high = "high_level_keywords" in parsed
has_low = "low_level_keywords" in parsed
is_list_high = isinstance(parsed.get("high_level_keywords"), list)
is_list_low = isinstance(parsed.get("low_level_keywords"), list)
if has_high and has_low and is_list_high and is_list_low:
# Check if trivial query returns empty
if case["expected_high"] == [] and case["expected_low"] == []:
passed = len(parsed["high_level_keywords"]) == 0 and len(parsed["low_level_keywords"]) == 0
details = "Empty lists returned for trivial query" if passed else f"Non-empty for trivial: {parsed}"
else:
# Check that some expected keywords are present (case-insensitive)
high_lower = [k.lower() for k in parsed["high_level_keywords"]]
low_lower = [k.lower() for k in parsed["low_level_keywords"]]
all_keywords = " ".join(high_lower + low_lower)
found_high = sum(1 for exp in case["expected_high"] if exp.lower() in all_keywords)
found_low = sum(1 for exp in case["expected_low"] if exp.lower() in all_keywords)
passed = found_high > 0 or found_low > 0
details = f"Found {found_high}/{len(case['expected_high'])} high, {found_low}/{len(case['expected_low'])} low"
else:
passed = False
details = f"Missing keys or wrong types: has_high={has_high}, has_low={has_low}"
except json.JSONDecodeError as e:
passed = False
details = f"JSON parse error: {e}"
results.append(TestResult(
name=f"Keywords: {case['query'][:40]}...",
passed=passed,
details=details,
raw_output=output[:200]
))
return results
async def test_orphan_validation() -> list[TestResult]:
"""Test orphan connection validation prompt."""
results = []
for case in ORPHAN_TEST_CASES:
prompt = PROMPTS["orphan_connection_validation"].format(
orphan_name=case["orphan"]["name"],
orphan_type=case["orphan"]["type"],
orphan_description=case["orphan"]["desc"],
candidate_name=case["candidate"]["name"],
candidate_type=case["candidate"]["type"],
candidate_description=case["candidate"]["desc"],
similarity_score=0.85,
)
output = await call_llm(prompt)
try:
# Clean potential markdown
clean = output.strip()
if clean.startswith("```"):
clean = clean.split("```")[1]
if clean.startswith("json"):
clean = clean[4:]
parsed = json.loads(clean)
has_should_connect = "should_connect" in parsed
has_confidence = "confidence" in parsed
has_reasoning = "reasoning" in parsed
if has_should_connect and has_confidence and has_reasoning:
correct_decision = parsed["should_connect"] == case["should_connect"]
valid_confidence = 0.0 <= parsed["confidence"] <= 1.0
passed = correct_decision and valid_confidence
details = f"Decision: {parsed['should_connect']} (expected {case['should_connect']}), confidence: {parsed['confidence']:.2f}"
else:
passed = False
details = f"Missing keys: should_connect={has_should_connect}, confidence={has_confidence}, reasoning={has_reasoning}"
except json.JSONDecodeError as e:
passed = False
details = f"JSON parse error: {e}"
results.append(TestResult(
name=f"Orphan: {case['orphan']['name']}{case['candidate']['name']}",
passed=passed,
details=details,
raw_output=output[:200]
))
return results
async def test_entity_summarization() -> list[TestResult]:
"""Test entity summarization prompt."""
results = []
for case in SUMMARIZATION_TEST_CASES:
prompt = PROMPTS["summarize_entity_descriptions"].format(
description_name=case["name"],
description_type=case["type"],
description_list="\n".join(case["descriptions"]),
summary_length=200,
language="English",
)
output = await call_llm(prompt)
# Check if required terms are present
output_lower = output.lower()
found = [term for term in case["must_contain"] if term.lower() in output_lower]
missing = [term for term in case["must_contain"] if term.lower() not in output_lower]
# Check it's not empty and mentions the entity
has_content = len(output.strip()) > 50
mentions_entity = case["name"].lower() in output_lower
passed = len(found) >= len(case["must_contain"]) // 2 and has_content and mentions_entity
details = f"Found {len(found)}/{len(case['must_contain'])} terms, mentions entity: {mentions_entity}"
if missing:
details += f", missing: {missing}"
results.append(TestResult(
name=f"Summarize: {case['name']}",
passed=passed,
details=details,
raw_output=output[:200]
))
return results
async def test_naive_rag_response() -> list[TestResult]:
"""Test naive RAG response prompt."""
results = []
for case in RAG_TEST_CASES:
prompt = PROMPTS["naive_rag_response"].format(
response_type="concise paragraph",
user_prompt=case["query"],
content_data=case["context"],
)
output = await call_llm(prompt)
# Check must_contain
output_lower = output.lower()
found = [term for term in case["must_contain"] if term.lower() in output_lower]
# Check must_not_contain (citation markers)
violations = [term for term in case["must_not_contain"] if term in output]
passed = len(found) == len(case["must_contain"]) and len(violations) == 0
details = f"Found {len(found)}/{len(case['must_contain'])} required terms"
if violations:
details += f", VIOLATIONS: {violations}"
results.append(TestResult(
name=f"RAG: {case['query'][:40]}",
passed=passed,
details=details,
raw_output=output[:200]
))
return results
# =============================================================================
# Main
# =============================================================================
async def main() -> None:
"""Run all accuracy tests."""
print("\n" + "=" * 70)
print(" PROMPT ACCURACY TESTS")
print("=" * 70)
all_results = []
# Run tests in parallel
print("\nRunning tests...")
keywords_results, orphan_results, summarize_results, rag_results = await asyncio.gather(
test_keywords_extraction(),
test_orphan_validation(),
test_entity_summarization(),
test_naive_rag_response(),
)
all_results.extend(keywords_results)
all_results.extend(orphan_results)
all_results.extend(summarize_results)
all_results.extend(rag_results)
# Print results
print("\n" + "-" * 70)
print(" RESULTS")
print("-" * 70)
passed = 0
failed = 0
for result in all_results:
status = "✓ PASS" if result.passed else "✗ FAIL"
print(f"\n{status}: {result.name}")
print(f" {result.details}")
if not result.passed:
print(f" Output: {result.raw_output}...")
if result.passed:
passed += 1
else:
failed += 1
# Summary
print("\n" + "=" * 70)
print(f" SUMMARY: {passed}/{passed + failed} tests passed")
print("=" * 70)
if failed > 0:
print("\n⚠️ Some tests failed - review prompt changes")
sys.exit(1)
else:
print("\n✓ All prompts producing correct outputs!")
if __name__ == "__main__":
asyncio.run(main())

View file

@ -0,0 +1,672 @@
"""
Deep Quality Analysis for Optimized Prompts.
Tests prompts on challenging, diverse inputs to identify weaknesses.
Run with: uv run --extra test python tests/test_prompt_quality_deep.py
"""
from __future__ import annotations
import asyncio
import json
import sys
from dataclasses import dataclass, field
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from lightrag.prompt import PROMPTS
# =============================================================================
# Test Data: Entity Extraction (5 Domains)
# =============================================================================
ENTITY_TEST_TEXTS = {
"technical": {
"name": "Technical/API Documentation",
"text": """
The FastAPI framework provides automatic OpenAPI documentation via Swagger UI at /docs.
To enable OAuth2 authentication, use the OAuth2PasswordBearer class from fastapi.security.
The dependency injection system allows you to declare dependencies using Depends().
Response models are validated using Pydantic's BaseModel class. For async database
operations, SQLAlchemy 2.0 with asyncpg driver is recommended. Rate limiting can be
implemented using slowapi middleware with Redis as the backend store.
""",
"expected_entities": ["FastAPI", "OpenAPI", "Swagger UI", "OAuth2", "Pydantic", "SQLAlchemy", "Redis"],
"expected_relations": ["FastAPI provides OpenAPI", "OAuth2 for authentication", "Pydantic validates responses"],
},
"legal": {
"name": "Legal/Contract",
"text": """
WHEREAS, Acme Corporation ("Licensor") owns certain intellectual property rights in
the Software known as "DataSync Pro"; and WHEREAS, Beta Technologies Inc. ("Licensee")
desires to obtain a non-exclusive license to use said Software; NOW THEREFORE, in
consideration of the mutual covenants herein, the parties agree: Licensee shall pay
Licensor a royalty of 5% of gross revenues derived from Software usage. This Agreement
shall be governed by the laws of the State of Delaware. Any disputes shall be resolved
through binding arbitration administered by JAMS in San Francisco, California.
""",
"expected_entities": ["Acme Corporation", "Beta Technologies Inc", "DataSync Pro", "Delaware", "JAMS", "San Francisco"],
"expected_relations": ["Licensor owns Software", "Licensee pays royalty", "Delaware governs agreement"],
},
"scientific": {
"name": "Scientific/Research Abstract",
"text": """
We investigated the effects of CRISPR-Cas9 gene editing on tumor suppressor p53 expression
in HeLa cell lines. Using Western blot analysis and qPCR, we observed a 73% reduction in
p53 protein levels after 48 hours of transfection. Control groups treated with scrambled
sgRNA showed no significant change (p > 0.05). Our findings suggest that targeted p53
knockout can be achieved with high efficiency, supporting further research into cancer
immunotherapy applications. Funding was provided by NIH grant R01-CA123456.
""",
"expected_entities": ["CRISPR-Cas9", "p53", "HeLa", "Western blot", "qPCR", "NIH"],
"expected_relations": ["CRISPR-Cas9 edits p53", "Western blot measures protein", "NIH funds research"],
},
"news": {
"name": "News/Current Events",
"text": """
BREAKING: Tesla CEO Elon Musk announced today that the company will invest $5 billion
in a new Gigafactory in Austin, Texas, creating an estimated 10,000 jobs by 2025.
The announcement came during Tesla's Q3 earnings call, where the company reported
record revenue of $25.2 billion. Analysts at Goldman Sachs raised their price target
to $300, citing strong demand for the Model Y in European markets. Shares rose 8%
in after-hours trading on the NASDAQ exchange.
""",
"expected_entities": ["Tesla", "Elon Musk", "Austin", "Texas", "Gigafactory", "Goldman Sachs", "Model Y", "NASDAQ"],
"expected_relations": ["Musk leads Tesla", "Tesla invests in Gigafactory", "Goldman Sachs analyzes Tesla"],
},
"conversational": {
"name": "Conversational/Interview",
"text": """
Interviewer: Dr. Chen, your research on quantum computing has gained significant attention.
Can you explain the breakthrough?
Dr. Chen: Certainly. Our team at MIT developed a new error correction method using topological
qubits. Unlike traditional approaches by IBM or Google, we achieved 99.9% fidelity at room
temperature. My colleague Dr. Sarah Martinez deserves much of the credit - her algorithm
made this possible.
Interviewer: What are the practical applications?
Dr. Chen: Drug discovery is the immediate target. Pfizer has already licensed our technology
for protein folding simulations.
""",
"expected_entities": ["Dr. Chen", "MIT", "IBM", "Google", "Dr. Sarah Martinez", "Pfizer"],
"expected_relations": ["Dr. Chen works at MIT", "Martinez developed algorithm", "Pfizer licenses technology"],
},
# New domains for expanded coverage
"medical": {
"name": "Medical/Healthcare",
"text": """
The patient presented with Type 2 diabetes mellitus complicated by diabetic retinopathy.
Treatment included metformin 500mg twice daily and monthly intravitreal injections of
Avastin (bevacizumab). Dr. Sarah Chen at Johns Hopkins recommended adding Jardiance
(empagliflozin) given the patient's elevated HbA1c of 8.2%. Insurance coverage through
Aetna required prior authorization.
""",
"expected_entities": ["Type 2 diabetes", "diabetic retinopathy", "metformin", "Avastin",
"Dr. Sarah Chen", "Johns Hopkins", "Jardiance", "Aetna"],
"expected_relations": ["metformin treats diabetes", "Avastin treats retinopathy", "Aetna covers insurance"],
},
"financial": {
"name": "Financial/Trading",
"text": """
The S&P 500 index futures dropped 2.3% following hawkish Fed minutes. BlackRock's
iShares ETF (SPY) saw record outflows of $4.2 billion. Morgan Stanley upgraded
NVIDIA to Overweight with a $500 price target, citing AI datacenter demand.
Bitcoin fell below $40,000 as Coinbase reported a 30% decline in trading volume.
""",
"expected_entities": ["S&P 500", "BlackRock", "SPY", "Morgan Stanley", "NVIDIA",
"Bitcoin", "Coinbase"],
"expected_relations": ["BlackRock manages SPY", "Morgan Stanley analyzes NVIDIA", "Coinbase trades Bitcoin"],
},
"social_media": {
"name": "Social Media/Informal",
"text": """
OMG just saw @elonmusk tweet about #Dogecoin again! Price pumped 15% in an hour!
Meanwhile TikTok is banning crypto content and YouTube demonetized half the crypto
channels. My Discord server is going crazy. Even r/wallstreetbets is talking about it.
""",
"expected_entities": ["Elon Musk", "Dogecoin", "TikTok", "YouTube", "Discord",
"r/wallstreetbets"],
"expected_relations": ["Musk tweets about Dogecoin", "TikTok bans crypto", "YouTube demonetizes channels"],
},
}
# =============================================================================
# Test Data: Keywords Extraction (Query Types)
# =============================================================================
KEYWORD_TEST_QUERIES = {
"factual": {
"query": "What is the capital of France?",
"expected_high": ["geography", "capital city"],
"expected_low": ["France", "capital"],
},
"analytical": {
"query": "Why does inflation cause interest rates to rise?",
"expected_high": ["economics", "monetary policy", "cause-effect"],
"expected_low": ["inflation", "interest rates"],
},
"comparison": {
"query": "How does Python compare to JavaScript for web development?",
"expected_high": ["programming languages", "comparison", "web development"],
"expected_low": ["Python", "JavaScript"],
},
"procedural": {
"query": "How to deploy a Docker container to AWS ECS?",
"expected_high": ["deployment", "cloud computing", "containerization"],
"expected_low": ["Docker", "AWS", "ECS", "container"],
},
"multi_topic": {
"query": "What is machine learning and how does it relate to artificial intelligence?",
"expected_high": ["machine learning", "artificial intelligence", "technology"],
"expected_low": ["machine learning", "artificial intelligence"],
},
# New query types for expanded coverage
"negation": {
"query": "What programming languages are NOT object-oriented?",
"expected_high": ["programming languages", "paradigms"],
"expected_low": ["object-oriented", "programming"],
},
"implicit": {
"query": "Tell me about climate",
"expected_high": ["climate", "environment"],
"expected_low": ["climate"],
},
"multi_hop": {
"query": "Who is the CEO of the company that makes the iPhone?",
"expected_high": ["business", "leadership", "technology"],
"expected_low": ["CEO", "iPhone"],
},
"ambiguous": {
"query": "What is Java used for?",
"expected_high": ["technology", "programming"],
"expected_low": ["Java"],
},
}
# =============================================================================
# Test Data: Orphan Validation (Edge Cases)
# =============================================================================
ORPHAN_TEST_CASES = [
# Same domain, directly related
{
"orphan": {"name": "Python", "type": "concept", "desc": "Programming language known for simplicity"},
"candidate": {"name": "Django", "type": "concept", "desc": "Web framework written in Python"},
"expected": True,
"difficulty": "easy",
"category": "same_domain_direct",
},
# Same domain, tangentially related
{
"orphan": {"name": "Bitcoin", "type": "concept", "desc": "Decentralized cryptocurrency"},
"candidate": {"name": "Visa", "type": "organization", "desc": "Payment processing company"},
"expected": True, # Both are payment/financial
"difficulty": "medium",
"category": "same_domain_tangential",
},
# Same domain, unrelated
{
"orphan": {"name": "Photosynthesis", "type": "concept", "desc": "Process plants use to convert sunlight to energy"},
"candidate": {"name": "Mitosis", "type": "concept", "desc": "Cell division process"},
"expected": False, # Both biology but not directly related
"difficulty": "medium",
"category": "same_domain_unrelated",
},
# Different domains, surprisingly related
{
"orphan": {"name": "Netflix", "type": "organization", "desc": "Streaming entertainment company"},
"candidate": {"name": "AWS", "type": "organization", "desc": "Amazon's cloud computing platform"},
"expected": True, # Netflix runs on AWS
"difficulty": "hard",
"category": "cross_domain_related",
},
# False positive trap (high similarity, no logic)
{
"orphan": {"name": "Java", "type": "concept", "desc": "Programming language developed by Sun Microsystems"},
"candidate": {"name": "Java", "type": "location", "desc": "Indonesian island known for coffee production"},
"expected": False, # Same name, completely different things
"difficulty": "hard",
"category": "false_positive_trap",
},
# Temporal relationship
{
"orphan": {"name": "Windows 11", "type": "product", "desc": "Microsoft operating system released in 2021"},
"candidate": {"name": "Windows 10", "type": "product", "desc": "Microsoft operating system released in 2015"},
"expected": True, # Successor relationship
"difficulty": "easy",
"category": "temporal",
},
# Causal relationship
{
"orphan": {"name": "COVID-19 Pandemic", "type": "event", "desc": "Global health crisis starting in 2020"},
"candidate": {"name": "Remote Work", "type": "concept", "desc": "Working from home or outside traditional office"},
"expected": True, # Pandemic caused remote work surge
"difficulty": "medium",
"category": "causal",
},
# Hierarchical
{
"orphan": {"name": "Toyota Camry", "type": "product", "desc": "Mid-size sedan manufactured by Toyota"},
"candidate": {"name": "Toyota", "type": "organization", "desc": "Japanese automotive manufacturer"},
"expected": True, # Part-of relationship
"difficulty": "easy",
"category": "hierarchical",
},
# Competitor relationship
{
"orphan": {"name": "Uber", "type": "organization", "desc": "Ride-sharing company"},
"candidate": {"name": "Lyft", "type": "organization", "desc": "Ride-sharing company"},
"expected": True, # Direct competitors
"difficulty": "easy",
"category": "competitor",
},
# No relationship
{
"orphan": {"name": "Beethoven", "type": "person", "desc": "Classical music composer from 18th century"},
"candidate": {"name": "Kubernetes", "type": "concept", "desc": "Container orchestration platform"},
"expected": False, # Completely unrelated
"difficulty": "easy",
"category": "no_relationship",
},
# New edge cases for expanded coverage
# Subsidiary relationship (ownership)
{
"orphan": {"name": "YouTube", "type": "organization", "desc": "Video sharing platform"},
"candidate": {"name": "Google", "type": "organization", "desc": "Technology company and search engine"},
"expected": True, # Google owns YouTube
"difficulty": "medium",
"category": "subsidiary",
},
# Scientific alias (same thing, different names)
{
"orphan": {"name": "SARS-CoV-2", "type": "concept", "desc": "Coronavirus that causes COVID-19"},
"candidate": {"name": "Coronavirus", "type": "concept", "desc": "Family of viruses affecting respiratory system"},
"expected": True, # SARS-CoV-2 is a type of coronavirus
"difficulty": "medium",
"category": "scientific_alias",
},
# Pseudonym (same person, different names)
{
"orphan": {"name": "Mark Twain", "type": "person", "desc": "American author of Tom Sawyer and Huckleberry Finn"},
"candidate": {"name": "Samuel Clemens", "type": "person", "desc": "American writer born in Missouri in 1835"},
"expected": True, # Same person (pen name)
"difficulty": "hard",
"category": "pseudonym",
},
# Weak false positive (similar names/categories but unrelated)
{
"orphan": {"name": "Mount Everest", "type": "location", "desc": "Highest mountain in the Himalayas"},
"candidate": {"name": "Mount Rushmore", "type": "location", "desc": "Memorial carved into mountain in South Dakota"},
"expected": False, # Both "Mount" but no logical connection
"difficulty": "medium",
"category": "weak_false_positive",
},
# Spurious correlation (statistically correlated but no causal link)
{
"orphan": {"name": "Ice Cream Sales", "type": "concept", "desc": "Consumer purchases of frozen dessert"},
"candidate": {"name": "Drowning Deaths", "type": "concept", "desc": "Fatalities from submersion in water"},
"expected": False, # Both increase in summer but no causal relationship
"difficulty": "hard",
"category": "spurious_correlation",
},
]
# =============================================================================
# Helper Functions
# =============================================================================
async def call_llm(prompt: str, model: str = "gpt-4o-mini") -> str:
"""Call OpenAI API."""
import openai
client = openai.AsyncOpenAI()
response = await client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
temperature=0.0,
)
return response.choices[0].message.content
def format_entity_prompt(text: str) -> str:
"""Format entity extraction prompt."""
examples = "\n".join(PROMPTS["entity_extraction_examples"])
tuple_del = PROMPTS["DEFAULT_TUPLE_DELIMITER"]
comp_del = PROMPTS["DEFAULT_COMPLETION_DELIMITER"]
examples = examples.format(
tuple_delimiter=tuple_del,
completion_delimiter=comp_del,
)
return PROMPTS["entity_extraction_system_prompt"].format(
tuple_delimiter=tuple_del,
completion_delimiter=comp_del,
entity_types="person, organization, location, concept, product, event, category, method",
language="English",
examples=examples,
input_text=text,
) + "\n" + PROMPTS["entity_extraction_user_prompt"].format(
completion_delimiter=comp_del,
language="English",
)
def parse_entities(output: str) -> tuple[list[dict], list[dict]]:
"""Parse entity extraction output."""
entities = []
relations = []
tuple_del = "<|#|>"
for line in output.strip().split("\n"):
line = line.strip()
if not line or "<|COMPLETE|>" in line:
continue
parts = line.split(tuple_del)
if len(parts) >= 4 and parts[0].lower() == "entity":
entities.append({
"name": parts[1].strip(),
"type": parts[2].strip(),
"desc": parts[3].strip() if len(parts) > 3 else "",
})
elif len(parts) >= 5 and parts[0].lower() == "relation":
relations.append({
"source": parts[1].strip(),
"target": parts[2].strip(),
"keywords": parts[3].strip(),
})
return entities, relations
@dataclass
class EntityResult:
domain: str
entities: list[dict]
relations: list[dict]
expected_entities: list[str]
issues: list[str] = field(default_factory=list)
precision_notes: str = ""
recall_notes: str = ""
@dataclass
class KeywordResult:
query_type: str
query: str
high_keywords: list[str]
low_keywords: list[str]
issues: list[str] = field(default_factory=list)
@dataclass
class OrphanResult:
category: str
orphan: str
candidate: str
expected: bool
actual: bool
confidence: float
correct: bool
reasoning: str = ""
# =============================================================================
# Test Functions
# =============================================================================
async def test_entity_extraction_deep() -> list[EntityResult]:
"""Deep test entity extraction on 5 domains."""
results = []
for domain, data in ENTITY_TEST_TEXTS.items():
print(f" Testing {data['name']}...")
prompt = format_entity_prompt(data["text"])
output = await call_llm(prompt)
entities, relations = parse_entities(output)
result = EntityResult(
domain=data["name"],
entities=entities,
relations=relations,
expected_entities=data["expected_entities"],
)
# Analyze precision (are extracted entities important?)
entity_names = [e["name"].lower() for e in entities]
found = [exp for exp in data["expected_entities"] if any(exp.lower() in n for n in entity_names)]
missed = [exp for exp in data["expected_entities"] if not any(exp.lower() in n for n in entity_names)]
if missed:
result.issues.append(f"RECALL: Missed expected entities: {missed}")
result.recall_notes = f"Found {len(found)}/{len(data['expected_entities'])}"
else:
result.recall_notes = f"Found all {len(found)} expected"
# Check for generic/unhelpful entities
generic_entities = [e for e in entities if len(e["desc"]) < 20]
if generic_entities:
result.issues.append(f"QUALITY: {len(generic_entities)} entities have very short descriptions")
# Check relationship density
if len(entities) > 0:
ratio = len(relations) / len(entities)
if ratio < 0.5:
result.issues.append(f"CONNECTIVITY: Low relation/entity ratio ({ratio:.2f})")
results.append(result)
return results
async def test_keywords_extraction_deep() -> list[KeywordResult]:
"""Deep test keywords extraction on varied query types."""
results = []
examples = "\n".join(PROMPTS["keywords_extraction_examples"])
for query_type, data in KEYWORD_TEST_QUERIES.items():
print(f" Testing {query_type} query...")
prompt = PROMPTS["keywords_extraction"].format(
examples=examples,
query=data["query"],
)
output = await call_llm(prompt)
try:
clean = output.strip()
if clean.startswith("```"):
clean = clean.split("```")[1].replace("json", "").strip()
parsed = json.loads(clean)
result = KeywordResult(
query_type=query_type,
query=data["query"],
high_keywords=parsed.get("high_level_keywords", []),
low_keywords=parsed.get("low_level_keywords", []),
)
# Check if key concepts are captured
all_kw = " ".join(result.high_keywords + result.low_keywords).lower()
for exp in data["expected_low"]:
if exp.lower() not in all_kw:
result.issues.append(f"MISS: Expected '{exp}' not in keywords")
# Check for reasonable count
if len(result.high_keywords) == 0:
result.issues.append("EMPTY: No high-level keywords")
if len(result.low_keywords) == 0 and query_type != "factual":
result.issues.append("EMPTY: No low-level keywords")
except json.JSONDecodeError:
result = KeywordResult(
query_type=query_type,
query=data["query"],
high_keywords=[],
low_keywords=[],
issues=["PARSE ERROR: Invalid JSON output"],
)
results.append(result)
return results
async def test_orphan_validation_deep() -> list[OrphanResult]:
"""Deep test orphan validation on edge cases."""
results = []
for case in ORPHAN_TEST_CASES:
print(f" Testing {case['category']}: {case['orphan']['name']}{case['candidate']['name']}...")
prompt = PROMPTS["orphan_connection_validation"].format(
orphan_name=case["orphan"]["name"],
orphan_type=case["orphan"]["type"],
orphan_description=case["orphan"]["desc"],
candidate_name=case["candidate"]["name"],
candidate_type=case["candidate"]["type"],
candidate_description=case["candidate"]["desc"],
similarity_score=0.75,
)
output = await call_llm(prompt)
try:
clean = output.strip()
if clean.startswith("```"):
clean = clean.split("```")[1].replace("json", "").strip()
parsed = json.loads(clean)
actual = parsed.get("should_connect", False)
# Handle discrete labels (HIGH/MEDIUM/LOW/NONE) or numeric
raw_confidence = parsed.get("confidence", 0.0)
if isinstance(raw_confidence, str):
confidence_map = {"HIGH": 0.95, "MEDIUM": 0.85, "LOW": 0.60, "NONE": 0.20}
confidence = confidence_map.get(raw_confidence.upper(), 0.75)
else:
confidence = raw_confidence
reasoning = parsed.get("reasoning", "")
results.append(OrphanResult(
category=case["category"],
orphan=case["orphan"]["name"],
candidate=case["candidate"]["name"],
expected=case["expected"],
actual=actual,
confidence=confidence,
correct=(actual == case["expected"]),
reasoning=reasoning,
))
except json.JSONDecodeError:
results.append(OrphanResult(
category=case["category"],
orphan=case["orphan"]["name"],
candidate=case["candidate"]["name"],
expected=case["expected"],
actual=False,
confidence=0.0,
correct=False,
reasoning="PARSE ERROR",
))
return results
# =============================================================================
# Main
# =============================================================================
async def main() -> None:
"""Run deep quality analysis."""
print("\n" + "=" * 70)
print(" DEEP PROMPT QUALITY ANALYSIS")
print("=" * 70)
# Entity Extraction
print(f"\n📊 ENTITY EXTRACTION ({len(ENTITY_TEST_TEXTS)} domains)")
print("-" * 50)
entity_results = await test_entity_extraction_deep()
for r in entity_results:
status = "" if not r.issues else ""
print(f"\n{status} {r.domain}")
print(f" Entities: {len(r.entities)} | Relations: {len(r.relations)}")
print(f" Recall: {r.recall_notes}")
if r.issues:
for issue in r.issues:
print(f"{issue}")
# Show extracted entities
print(f" Extracted: {[e['name'] for e in r.entities[:8]]}{'...' if len(r.entities) > 8 else ''}")
# Keywords Extraction
print(f"\n\n📊 KEYWORDS EXTRACTION ({len(KEYWORD_TEST_QUERIES)} query types)")
print("-" * 50)
keyword_results = await test_keywords_extraction_deep()
for r in keyword_results:
status = "" if not r.issues else ""
print(f"\n{status} {r.query_type}: \"{r.query[:50]}...\"")
print(f" High: {r.high_keywords}")
print(f" Low: {r.low_keywords}")
if r.issues:
for issue in r.issues:
print(f"{issue}")
# Orphan Validation
print(f"\n\n📊 ORPHAN VALIDATION ({len(ORPHAN_TEST_CASES)} edge cases)")
print("-" * 50)
orphan_results = await test_orphan_validation_deep()
correct = sum(1 for r in orphan_results if r.correct)
print(f"\nAccuracy: {correct}/{len(orphan_results)} ({100*correct/len(orphan_results):.0f}%)")
for r in orphan_results:
status = "" if r.correct else ""
decision = "CONNECT" if r.actual else "REJECT"
expected = "CONNECT" if r.expected else "REJECT"
print(f"\n{status} [{r.category}] {r.orphan}{r.candidate}")
print(f" Decision: {decision} (conf={r.confidence:.2f}) | Expected: {expected}")
if not r.correct:
print(f" ❌ WRONG: {r.reasoning[:80]}...")
# Summary
print("\n" + "=" * 70)
print(" SUMMARY")
print("=" * 70)
entity_issues = sum(len(r.issues) for r in entity_results)
keyword_issues = sum(len(r.issues) for r in keyword_results)
orphan_accuracy = 100 * correct / len(orphan_results)
print(f"\nEntity Extraction: {entity_issues} issues across {len(entity_results)} domains")
print(f"Keywords Extraction: {keyword_issues} issues across {len(keyword_results)} query types")
print(f"Orphan Validation: {orphan_accuracy:.0f}% accuracy ({correct}/{len(orphan_results)})")
# Recommendations
print("\n📋 RECOMMENDATIONS:")
if entity_issues > 3:
print(" • Entity extraction needs improvement (recall or description quality)")
if keyword_issues > 2:
print(" • Keywords extraction needs clearer guidance")
if orphan_accuracy < 80:
print(" • Orphan validation needs better examples or criteria")
if entity_issues <= 3 and keyword_issues <= 2 and orphan_accuracy >= 80:
print(" • All prompts performing well! Minor tuning may help but not critical.")
if __name__ == "__main__":
asyncio.run(main())

48
uv.lock generated
View file

@ -2641,6 +2641,9 @@ evaluation = [
{ name = "uvicorn" },
{ name = "xlsxwriter" },
]
lint = [
{ name = "ty" },
]
observability = [
{ name = "langfuse" },
]
@ -2725,7 +2728,6 @@ pytest = [
{ name = "pytest" },
{ name = "pytest-asyncio" },
{ name = "ruff" },
{ name = "ty" },
]
test = [
{ name = "aiofiles" },
@ -2770,7 +2772,6 @@ test = [
{ name = "setuptools" },
{ name = "tenacity" },
{ name = "tiktoken" },
{ name = "ty" },
{ name = "uvicorn" },
{ name = "xlsxwriter" },
]
@ -2858,15 +2859,14 @@ requires-dist = [
{ name = "tenacity", marker = "extra == 'api'" },
{ name = "tiktoken" },
{ name = "tiktoken", marker = "extra == 'api'" },
{ name = "ty", marker = "extra == 'pytest'" },
{ name = "ty", marker = "extra == 'test'" },
{ name = "ty", marker = "extra == 'lint'", specifier = ">=0.0.1a30" },
{ name = "uvicorn", marker = "extra == 'api'" },
{ name = "voyageai", marker = "extra == 'offline-llm'", specifier = ">=0.2.0,<1.0.0" },
{ name = "xlsxwriter", specifier = ">=3.1.0" },
{ name = "xlsxwriter", marker = "extra == 'api'", specifier = ">=3.1.0" },
{ name = "zhipuai", marker = "extra == 'offline-llm'", specifier = ">=2.0.0,<3.0.0" },
]
provides-extras = ["pytest", "api", "docling", "offline-storage", "offline-llm", "offline", "test", "evaluation", "observability"]
provides-extras = ["pytest", "api", "docling", "offline-storage", "offline-llm", "offline", "test", "lint", "evaluation", "observability"]
[[package]]
name = "llama-cloud"
@ -6580,27 +6580,27 @@ wheels = [
[[package]]
name = "ty"
version = "0.0.1a26"
version = "0.0.1a31"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/39/39/b4b4ecb6ca6d7e937fa56f0b92a8f48d7719af8fe55bdbf667638e9f93e2/ty-0.0.1a26.tar.gz", hash = "sha256:65143f8efeb2da1644821b710bf6b702a31ddcf60a639d5a576db08bded91db4", size = 4432154, upload-time = "2025-11-10T18:02:30.142Z" }
sdist = { url = "https://files.pythonhosted.org/packages/30/78/daa1e70377b8127e06db63063b7dd9694cb2bb611b4e3c2182b9ec5a02a1/ty-0.0.1a31.tar.gz", hash = "sha256:b878b04af63b1e716436897838ca6a107a672539155b6fc2051268cd85da9cd6", size = 4656004, upload-time = "2025-12-04T09:01:47.147Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/cc/6a/661833ecacc4d994f7e30a7f1307bfd3a4a91392a6b03fb6a018723e75b8/ty-0.0.1a26-py3-none-linux_armv6l.whl", hash = "sha256:09208dca99bb548e9200136d4d42618476bfe1f4d2066511f2c8e2e4dfeced5e", size = 9173869, upload-time = "2025-11-10T18:01:46.012Z" },
{ url = "https://files.pythonhosted.org/packages/66/a8/32ea50f064342de391a7267f84349287e2f1c2eb0ad4811d6110916179d6/ty-0.0.1a26-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:91d12b66c91a1b82e698a2aa73fe043a1a9da83ff0dfd60b970500bee0963b91", size = 8973420, upload-time = "2025-11-10T18:01:49.32Z" },
{ url = "https://files.pythonhosted.org/packages/d1/f6/6659d55940cd5158a6740ae46a65be84a7ee9167738033a9b1259c36eef5/ty-0.0.1a26-py3-none-macosx_11_0_arm64.whl", hash = "sha256:c5bc6dfcea5477c81ad01d6a29ebc9bfcbdb21c34664f79c9e1b84be7aa8f289", size = 8528888, upload-time = "2025-11-10T18:01:51.511Z" },
{ url = "https://files.pythonhosted.org/packages/79/c9/4cbe7295013cc412b4f100b509aaa21982c08c59764a2efa537ead049345/ty-0.0.1a26-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40e5d15635e9918924138e8d3fb1cbf80822dfb8dc36ea8f3e72df598c0c4bea", size = 8801867, upload-time = "2025-11-10T18:01:53.888Z" },
{ url = "https://files.pythonhosted.org/packages/ed/b3/25099b219a6444c4b29f175784a275510c1cd85a23a926d687ab56915027/ty-0.0.1a26-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:86dc147ed0790c7c8fd3f0d6c16c3c5135b01e99c440e89c6ca1e0e592bb6682", size = 8975519, upload-time = "2025-11-10T18:01:56.231Z" },
{ url = "https://files.pythonhosted.org/packages/73/3e/3ad570f4f592cb1d11982dd2c426c90d2aa9f3d38bf77a7e2ce8aa614302/ty-0.0.1a26-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fbe0e07c9d5e624edfc79a468f2ef191f9435581546a5bb6b92713ddc86ad4a6", size = 9331932, upload-time = "2025-11-10T18:01:58.476Z" },
{ url = "https://files.pythonhosted.org/packages/04/fa/62c72eead0302787f9cc0d613fc671107afeecdaf76ebb04db8f91bb9f7e/ty-0.0.1a26-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:0dcebbfe9f24b43d98a078f4a41321ae7b08bea40f5c27d81394b3f54e9f7fb5", size = 9921353, upload-time = "2025-11-10T18:02:00.749Z" },
{ url = "https://files.pythonhosted.org/packages/6c/1f/3b329c4b60d878704e09eb9d05467f911f188e699961c044b75932893e0a/ty-0.0.1a26-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0901b75afc7738224ffc98bbc8ea03a20f167a2a83a4b23a6550115e8b3ddbc6", size = 9700800, upload-time = "2025-11-10T18:02:03.544Z" },
{ url = "https://files.pythonhosted.org/packages/92/24/13fcba20dd86a7c3f83c814279aa3eb6a29c5f1b38a3b3a4a0fd22159189/ty-0.0.1a26-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4788f34d384c132977958d76fef7f274f8d181b22e33933c4d16cff2bb5ca3b9", size = 9728289, upload-time = "2025-11-10T18:02:06.386Z" },
{ url = "https://files.pythonhosted.org/packages/40/7a/798894ff0b948425570b969be35e672693beeb6b852815b7340bc8de1575/ty-0.0.1a26-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b98851c11c560ce63cd972ed9728aa079d9cf40483f2cdcf3626a55849bfe107", size = 9279735, upload-time = "2025-11-10T18:02:09.425Z" },
{ url = "https://files.pythonhosted.org/packages/1a/54/71261cc1b8dc7d3c4ad92a83b4d1681f5cb7ea5965ebcbc53311ae8c6424/ty-0.0.1a26-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:c20b4625a20059adecd86fe2c4df87cd6115fea28caee45d3bdcf8fb83d29510", size = 8767428, upload-time = "2025-11-10T18:02:11.956Z" },
{ url = "https://files.pythonhosted.org/packages/8e/07/b248b73a640badba2b301e6845699b7dd241f40a321b9b1bce684d440f70/ty-0.0.1a26-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d9909e96276f8d16382d285db92ae902174cae842aa953003ec0c06642db2f8a", size = 9009170, upload-time = "2025-11-10T18:02:14.878Z" },
{ url = "https://files.pythonhosted.org/packages/f8/35/ec8353f2bb7fd2f41bca6070b29ecb58e2de9af043e649678b8c132d5439/ty-0.0.1a26-py3-none-musllinux_1_2_i686.whl", hash = "sha256:a76d649ceefe9baa9bbae97d217bee076fd8eeb2a961f66f1dff73cc70af4ac8", size = 9119215, upload-time = "2025-11-10T18:02:18.329Z" },
{ url = "https://files.pythonhosted.org/packages/70/48/db49fe1b7e66edf90dc285869043f99c12aacf7a99c36ee760e297bac6d5/ty-0.0.1a26-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:a0ee0f6366bcf70fae114e714d45335cacc8daa936037441e02998a9110b7a29", size = 9398655, upload-time = "2025-11-10T18:02:21.031Z" },
{ url = "https://files.pythonhosted.org/packages/10/f8/d869492bdbb21ae8cf4c99b02f20812bbbf49aa187cfeb387dfaa03036a8/ty-0.0.1a26-py3-none-win32.whl", hash = "sha256:86689b90024810cac7750bf0c6e1652e4b4175a9de7b82b8b1583202aeb47287", size = 8645669, upload-time = "2025-11-10T18:02:23.23Z" },
{ url = "https://files.pythonhosted.org/packages/b4/18/8a907575d2b335afee7556cb92233ebb5efcefe17752fc9dcab21cffb23b/ty-0.0.1a26-py3-none-win_amd64.whl", hash = "sha256:829e6e6dbd7d9d370f97b2398b4804552554bdcc2d298114fed5e2ea06cbc05c", size = 9442975, upload-time = "2025-11-10T18:02:25.68Z" },
{ url = "https://files.pythonhosted.org/packages/e9/22/af92dcfdd84b78dd97ac6b7154d6a763781f04a400140444885c297cc213/ty-0.0.1a26-py3-none-win_arm64.whl", hash = "sha256:b8f431c784d4cf5b4195a3521b2eca9c15902f239b91154cb920da33f943c62b", size = 8958958, upload-time = "2025-11-10T18:02:28.071Z" },
{ url = "https://files.pythonhosted.org/packages/08/4c/1e91d6b22dee1435db1cdf55e54ec601497dba650684517b1cd5b4345e80/ty-0.0.1a31-py3-none-linux_armv6l.whl", hash = "sha256:662b9a3a3497da12416789e21fda9eb4e1ac66c5233867d89953916099ee44f5", size = 9620261, upload-time = "2025-12-04T09:01:35.001Z" },
{ url = "https://files.pythonhosted.org/packages/94/8f/eb6ac56cc03a00d3258c3362c4fb5a58152d03e9fa207db0465e2dc717e2/ty-0.0.1a31-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:da05f73df587ff1362d487681370db47541123c005a0d1a60a5a048039e309cc", size = 9411370, upload-time = "2025-12-04T09:01:23.903Z" },
{ url = "https://files.pythonhosted.org/packages/d6/72/2cdbef5bd7ee7a58e71e67e845ae3f99dca695d0bca7561a3294fb8d723e/ty-0.0.1a31-py3-none-macosx_11_0_arm64.whl", hash = "sha256:74032bf207ce1eddc042f26aa9b6e0713373cf2c502174a53a41f9c469f02adb", size = 8925400, upload-time = "2025-12-04T09:01:59.074Z" },
{ url = "https://files.pythonhosted.org/packages/f4/4d/a10c3f2e8969e9e1efe3179d2c961236413c9765c9f95e84e8f515fb9b02/ty-0.0.1a31-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd383fd54872df15816a7853a8c824400c85f850916bad2052564bad8462f4f2", size = 9201615, upload-time = "2025-12-04T09:01:21.085Z" },
{ url = "https://files.pythonhosted.org/packages/47/e5/bd26f0fc432459718b72a0bb41bd222fd1fad81c1d5f645a7eba94e14be6/ty-0.0.1a31-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c6defaf175bce7c91cea9a168a1c30bb523269ed174941cd31f8edc2d77f8ec7", size = 9401110, upload-time = "2025-12-04T09:01:32.241Z" },
{ url = "https://files.pythonhosted.org/packages/ca/63/cbb3419f74ce38c0a2affbc269d4d27ec032cfbc3b011a8db5815c89f540/ty-0.0.1a31-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d41f7e68f05517177ef82d89bfe0bf8e787a6b72ad396c1e44a16ef353b95e2", size = 9779837, upload-time = "2025-12-04T09:01:37.55Z" },
{ url = "https://files.pythonhosted.org/packages/69/fb/1d99243a0e005fe8d53671d4a25d5ddcf345a12fb3c683726bd597e42f23/ty-0.0.1a31-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:22f2298a0de1a8af24f50e498023770b05ea4fc0ccebb2c53deb40ff73dc76fc", size = 10444412, upload-time = "2025-12-04T09:01:56.888Z" },
{ url = "https://files.pythonhosted.org/packages/65/7f/95242feb774356b7a93beb5278cd8c8bbb6a8b12d94977ff954929ed257e/ty-0.0.1a31-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:38669b9aa53bd87160a2ee8447a3bf8d91dd14b7462f8aa98f1d2740b609589a", size = 10171070, upload-time = "2025-12-04T09:01:44.917Z" },
{ url = "https://files.pythonhosted.org/packages/78/fa/4d8adeb9ff7fd32efcb9ebb05d5f61cd9ad4b4030390c76cd771fb38ac33/ty-0.0.1a31-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5fbc75e8e848929155b7ba0e9a222b2561222a1135bb492a9c5b9ad945c80b18", size = 10188190, upload-time = "2025-12-04T09:01:42.577Z" },
{ url = "https://files.pythonhosted.org/packages/f6/40/295903716cc2e4fdb88d8cf8b974f0936e6c021f35d5a7f78b769c746bcc/ty-0.0.1a31-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:915c0639dfb73f5f19cd69bbe89036ef18b8066ba88ce38d3e3cc0f39b32f99a", size = 9713419, upload-time = "2025-12-04T09:01:26.633Z" },
{ url = "https://files.pythonhosted.org/packages/f0/93/b622782ce78f0cbacf167c617b41f45e76de02e3d5d5898fc78ad7a47de7/ty-0.0.1a31-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:28d23f4e58e9b08fc8966ad8dac754b4cd5ccafed711e2a32a62f3d2cb6f44cb", size = 9170660, upload-time = "2025-12-04T09:02:01.556Z" },
{ url = "https://files.pythonhosted.org/packages/d9/5d/2a04dfd412c87d1da220260a5cf8444d36fa356d1f993ee1db5ad820df93/ty-0.0.1a31-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:cb2ebbc8065f4dd09e4b82f492becc55cad39068e842f82bfa1c9f7b9864b034", size = 9443773, upload-time = "2025-12-04T09:01:39.958Z" },
{ url = "https://files.pythonhosted.org/packages/1f/5e/18c0123b8dcd6a7e7f4a35d3eed127c6c0140377f5986df6bd01c9df5eb1/ty-0.0.1a31-py3-none-musllinux_1_2_i686.whl", hash = "sha256:966984a8a0e4f99d133e9b73bc778d4861b58467bdb85950805d67ff90e73e3e", size = 9532255, upload-time = "2025-12-04T09:01:29.243Z" },
{ url = "https://files.pythonhosted.org/packages/8a/c0/2570e4f891f33c3f9160f052d3759e9c7a3dee29bac5b93ad1f29ed42526/ty-0.0.1a31-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:bc4fe23fddaa78c0b11637a1eff152b95988960e5d240d282b41f827d13b28f0", size = 9837753, upload-time = "2025-12-04T09:02:04.361Z" },
{ url = "https://files.pythonhosted.org/packages/58/ad/6a231c11b95d3aa3f54824edfb7ab13ae13eea405bbcc6c80090551bd1b2/ty-0.0.1a31-py3-none-win32.whl", hash = "sha256:f82f4e051c40033ca9f10cffafc346fd86ea6541e786c2b1fcffa08c661efbaa", size = 9011568, upload-time = "2025-12-04T09:01:54.033Z" },
{ url = "https://files.pythonhosted.org/packages/f7/8b/c07438de84e3e9cbecedd2e8895dc25ca1b110847dd95b5e9f50124eb8d5/ty-0.0.1a31-py3-none-win_amd64.whl", hash = "sha256:12fae6138c4cbd143fe4b5c616056535353a2d0821062d8750132d3ea022fa8f", size = 9880831, upload-time = "2025-12-04T09:01:18.284Z" },
{ url = "https://files.pythonhosted.org/packages/a3/9c/ad589282e76e185eb54c3ce212182f7a28547ed20a5a08b51f9684dc2849/ty-0.0.1a31-py3-none-win_arm64.whl", hash = "sha256:4cc339de4dd4b8dd7167cfd1f826a25e303b3dec27da74596a0ce3ed83bcd293", size = 9380327, upload-time = "2025-12-04T09:01:49.651Z" },
]
[[package]]