test(lightrag): add orphan connection feature with quality validation tests
Implement automatic orphan entity connection system that identifies entities with no relationships and creates meaningful connections via vector similarity + LLM validation. This improves knowledge graph connectivity and retrieval quality. Changes: - Add orphan connection configuration parameters (thresholds, cross-connect settings) - Implement aconnect_orphan_entities() method with 4-step validation pipeline - Add SQL templates for efficient orphan and candidate entity queries - Create POST /graph/orphans/connect API endpoint with configurable parameters - Add orphan connection validation prompt for LLM-based relationship verification - Include relationship density requirement in extraction prompts to prevent orphans - Update docker-compose.test.yml with optimized extraction parameters - Add quality validation test suite (run_quality_tests.py) for retrieval evaluation - Add unit test framework (test_orphan_connection_quality.py) with test cases - Enable auto-run of orphan connection after document processing
This commit is contained in:
parent
90825e823a
commit
d2c9e6e2ec
7 changed files with 998 additions and 10 deletions
|
|
@ -69,7 +69,18 @@ services:
|
|||
|
||||
# Processing
|
||||
- MAX_ASYNC=4
|
||||
- CHUNK_SIZE=1200
|
||||
|
||||
# Extraction Optimization - Reduce Orphan Nodes
|
||||
- CHUNK_SIZE=800 # Smaller chunks for focused extraction
|
||||
- CHUNK_OVERLAP_SIZE=400 # 50% overlap captures cross-boundary relationships
|
||||
- MAX_GLEANING=1 # Enable gleaning refinement pass
|
||||
- FORCE_LLM_SUMMARY_ON_MERGE=4 # More aggressive entity consolidation
|
||||
|
||||
# Orphan Connection - Self-healing graph
|
||||
- AUTO_CONNECT_ORPHANS=true # Run orphan connection after each doc
|
||||
- ORPHAN_CONNECTION_THRESHOLD=0.3 # Vector similarity pre-filter threshold
|
||||
- ORPHAN_CONFIDENCE_THRESHOLD=0.7 # LLM confidence required for connection
|
||||
- ORPHAN_CROSS_CONNECT=true # Allow orphan-to-orphan connections
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
|
|
|
|||
|
|
@ -41,6 +41,31 @@ class EntityMergeRequest(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
class OrphanConnectionRequest(BaseModel):
|
||||
max_candidates: int = Field(
|
||||
default=3,
|
||||
description="Maximum number of candidate connections to evaluate per orphan",
|
||||
ge=1,
|
||||
le=10,
|
||||
)
|
||||
similarity_threshold: Optional[float] = Field(
|
||||
default=None,
|
||||
description="Vector similarity threshold for candidates (0.0-1.0). Uses server config if not provided.",
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
)
|
||||
confidence_threshold: Optional[float] = Field(
|
||||
default=None,
|
||||
description="LLM confidence threshold for creating connections (0.0-1.0). Uses server config if not provided.",
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
)
|
||||
cross_connect: Optional[bool] = Field(
|
||||
default=None,
|
||||
description="Allow orphans to connect to other orphans. Uses server config if not provided.",
|
||||
)
|
||||
|
||||
|
||||
class EntityCreateRequest(BaseModel):
|
||||
entity_name: str = Field(
|
||||
...,
|
||||
|
|
@ -685,4 +710,76 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
|
|||
status_code=500, detail=f"Error merging entities: {str(e)}"
|
||||
)
|
||||
|
||||
@router.post("/graph/orphans/connect", dependencies=[Depends(combined_auth)])
|
||||
async def connect_orphan_entities(request: OrphanConnectionRequest):
|
||||
"""
|
||||
Connect orphan entities (entities with no relationships) to the knowledge graph
|
||||
|
||||
This endpoint identifies entities that have no connections (orphans) and attempts
|
||||
to find meaningful relationships using vector similarity and LLM validation.
|
||||
This helps improve graph connectivity and retrieval quality.
|
||||
|
||||
The process:
|
||||
1. Identifies all orphan entities (entities with zero relationships)
|
||||
2. For each orphan, finds candidate connections using vector similarity
|
||||
3. Validates each candidate with LLM to ensure meaningful relationships
|
||||
4. Creates connections only for validated relationships above confidence threshold
|
||||
|
||||
Request Body:
|
||||
max_candidates (int): Maximum candidates to evaluate per orphan (default: 3)
|
||||
similarity_threshold (float): Vector similarity threshold (0.0-1.0)
|
||||
confidence_threshold (float): LLM confidence required (0.0-1.0)
|
||||
cross_connect (bool): Allow orphan-to-orphan connections
|
||||
|
||||
Response Schema:
|
||||
{
|
||||
"status": "success",
|
||||
"message": "Connected 15 out of 72 orphan entities",
|
||||
"data": {
|
||||
"orphans_found": 72,
|
||||
"connections_made": 15,
|
||||
"connections": [
|
||||
{
|
||||
"orphan": "Amazon",
|
||||
"connected_to": "E-Commerce",
|
||||
"relationship_type": "categorical",
|
||||
"keywords": "technology, retail",
|
||||
"confidence": 0.85,
|
||||
"similarity": 0.72
|
||||
},
|
||||
...
|
||||
],
|
||||
"errors": []
|
||||
}
|
||||
}
|
||||
|
||||
HTTP Status Codes:
|
||||
200: Operation completed (check connections_made for results)
|
||||
500: Internal server error
|
||||
|
||||
Note:
|
||||
- Requires PostgreSQL vector storage (PGVectorStorage)
|
||||
- LLM calls are made for each candidate, so cost scales with orphans × candidates
|
||||
- Only one connection is made per orphan (to the first valid candidate)
|
||||
"""
|
||||
try:
|
||||
result = await rag.aconnect_orphan_entities(
|
||||
max_candidates=request.max_candidates,
|
||||
similarity_threshold=request.similarity_threshold,
|
||||
confidence_threshold=request.confidence_threshold,
|
||||
cross_connect=request.cross_connect,
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": f"Connected {result['connections_made']} out of {result['orphans_found']} orphan entities",
|
||||
"data": result,
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error connecting orphan entities: {str(e)}")
|
||||
logger.error(traceback.format_exc())
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Error connecting orphan entities: {str(e)}"
|
||||
)
|
||||
|
||||
return router
|
||||
|
|
|
|||
|
|
@ -5285,4 +5285,40 @@ SQL_TEMPLATES = {
|
|||
FROM LIGHTRAG_ENTITY_ALIASES
|
||||
WHERE workspace=$1 AND canonical_entity=$2
|
||||
""",
|
||||
# Orphan connection queries
|
||||
"get_orphan_entities": """
|
||||
SELECT e.id, e.entity_name, e.content, e.content_vector
|
||||
FROM LIGHTRAG_VDB_ENTITY e
|
||||
WHERE e.workspace = $1
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM LIGHTRAG_VDB_RELATION r
|
||||
WHERE r.workspace = $1
|
||||
AND (r.source_id = e.entity_name OR r.target_id = e.entity_name)
|
||||
)
|
||||
""",
|
||||
"get_orphan_candidates": """
|
||||
SELECT e.id, e.entity_name, e.content,
|
||||
1 - (e.content_vector <=> $2::vector) AS similarity
|
||||
FROM LIGHTRAG_VDB_ENTITY e
|
||||
WHERE e.workspace = $1
|
||||
AND e.entity_name != $3
|
||||
AND 1 - (e.content_vector <=> $2::vector) >= $4
|
||||
ORDER BY e.content_vector <=> $2::vector
|
||||
LIMIT $5
|
||||
""",
|
||||
"get_connected_candidates": """
|
||||
SELECT e.id, e.entity_name, e.content,
|
||||
1 - (e.content_vector <=> $2::vector) AS similarity
|
||||
FROM LIGHTRAG_VDB_ENTITY e
|
||||
WHERE e.workspace = $1
|
||||
AND e.entity_name != $3
|
||||
AND 1 - (e.content_vector <=> $2::vector) >= $4
|
||||
AND EXISTS (
|
||||
SELECT 1 FROM LIGHTRAG_VDB_RELATION r
|
||||
WHERE r.workspace = $1
|
||||
AND (r.source_id = e.entity_name OR r.target_id = e.entity_name)
|
||||
)
|
||||
ORDER BY e.content_vector <=> $2::vector
|
||||
LIMIT $5
|
||||
""",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -223,6 +223,34 @@ class LightRAG:
|
|||
Set to EntityResolutionConfig() to enable, or None to disable.
|
||||
Resolves entities like 'FDA' → 'US Food and Drug Administration'."""
|
||||
|
||||
# Orphan connection
|
||||
# ---
|
||||
|
||||
auto_connect_orphans: bool = field(
|
||||
default=get_env_value("AUTO_CONNECT_ORPHANS", False, bool)
|
||||
)
|
||||
"""Automatically run orphan connection after each document insert.
|
||||
Orphans are entities with no relationships. This finds meaningful
|
||||
connections using vector similarity + LLM validation."""
|
||||
|
||||
orphan_connection_threshold: float = field(
|
||||
default=get_env_value("ORPHAN_CONNECTION_THRESHOLD", 0.3, float)
|
||||
)
|
||||
"""Vector similarity threshold for orphan connection candidates.
|
||||
Lower = more candidates (more LLM calls). Range: 0.0-1.0."""
|
||||
|
||||
orphan_confidence_threshold: float = field(
|
||||
default=get_env_value("ORPHAN_CONFIDENCE_THRESHOLD", 0.7, float)
|
||||
)
|
||||
"""LLM confidence threshold for creating orphan connections.
|
||||
Higher = stricter validation. Range: 0.0-1.0."""
|
||||
|
||||
orphan_cross_connect: bool = field(
|
||||
default=get_env_value("ORPHAN_CROSS_CONNECT", True, bool)
|
||||
)
|
||||
"""Allow orphans to connect to other orphans, forming new clusters.
|
||||
If False, orphans can only connect to already-connected entities."""
|
||||
|
||||
# Text chunking
|
||||
# ---
|
||||
|
||||
|
|
@ -2214,6 +2242,30 @@ class LightRAG:
|
|||
pipeline_status["latest_message"] = log_message
|
||||
pipeline_status["history_messages"].append(log_message)
|
||||
|
||||
# Auto-connect orphan entities if enabled
|
||||
if self.auto_connect_orphans:
|
||||
try:
|
||||
orphan_log = "Running auto orphan connection..."
|
||||
logger.info(orphan_log)
|
||||
if pipeline_status is not None and pipeline_status_lock is not None:
|
||||
async with pipeline_status_lock:
|
||||
pipeline_status["latest_message"] = orphan_log
|
||||
pipeline_status["history_messages"].append(orphan_log)
|
||||
|
||||
result = await self.aconnect_orphan_entities()
|
||||
|
||||
orphan_done_log = (
|
||||
f"Orphan connection complete: {result['connections_made']} connections made"
|
||||
)
|
||||
logger.info(orphan_done_log)
|
||||
if pipeline_status is not None and pipeline_status_lock is not None:
|
||||
async with pipeline_status_lock:
|
||||
pipeline_status["latest_message"] = orphan_done_log
|
||||
pipeline_status["history_messages"].append(orphan_done_log)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Auto orphan connection failed: {e}")
|
||||
|
||||
def insert_custom_kg(
|
||||
self, custom_kg: dict[str, Any], full_doc_id: str = None
|
||||
) -> None:
|
||||
|
|
@ -4037,3 +4089,216 @@ class LightRAG:
|
|||
loop.run_until_complete(
|
||||
self.aexport_data(output_path, file_format, include_vector_data)
|
||||
)
|
||||
|
||||
async def aconnect_orphan_entities(
|
||||
self,
|
||||
max_candidates: int = 3,
|
||||
similarity_threshold: float | None = None,
|
||||
confidence_threshold: float | None = None,
|
||||
cross_connect: bool | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Asynchronously connect orphan entities to the knowledge graph.
|
||||
|
||||
Finds entities with no relationships (orphans), identifies potential
|
||||
connections using vector similarity, validates them with LLM, and
|
||||
creates meaningful relationships.
|
||||
|
||||
Args:
|
||||
max_candidates: Maximum candidates to evaluate per orphan (default: 3)
|
||||
similarity_threshold: Vector similarity threshold (0.0-1.0). Uses config if None.
|
||||
confidence_threshold: LLM confidence threshold (0.0-1.0). Uses config if None.
|
||||
cross_connect: Allow orphan-to-orphan connections. Uses config if None.
|
||||
|
||||
Returns:
|
||||
Dictionary containing:
|
||||
- orphans_found: Number of orphan entities found
|
||||
- connections_made: Number of new connections created
|
||||
- connections: List of connection details
|
||||
- errors: List of any errors encountered
|
||||
"""
|
||||
# Use config values if not explicitly provided
|
||||
sim_threshold = similarity_threshold if similarity_threshold is not None else self.orphan_connection_threshold
|
||||
conf_threshold = confidence_threshold if confidence_threshold is not None else self.orphan_confidence_threshold
|
||||
allow_cross_connect = cross_connect if cross_connect is not None else self.orphan_cross_connect
|
||||
|
||||
result = {
|
||||
"orphans_found": 0,
|
||||
"connections_made": 0,
|
||||
"connections": [],
|
||||
"errors": [],
|
||||
}
|
||||
|
||||
# Check if using PostgreSQL storage (required for this feature)
|
||||
if not hasattr(self.entities_vdb, "db") or self.entities_vdb.db is None:
|
||||
result["errors"].append("Orphan connection requires PostgreSQL vector storage")
|
||||
return result
|
||||
|
||||
try:
|
||||
from lightrag.kg.postgres_impl import SQL_TEMPLATES
|
||||
|
||||
db = self.entities_vdb.db
|
||||
workspace = self.entities_vdb.workspace
|
||||
|
||||
# Step 1: Get orphan entities
|
||||
orphan_sql = SQL_TEMPLATES["get_orphan_entities"]
|
||||
orphans = await db.query(orphan_sql, [workspace], multirows=True)
|
||||
|
||||
if not orphans:
|
||||
logger.info(f"[{workspace}] No orphan entities found")
|
||||
return result
|
||||
|
||||
result["orphans_found"] = len(orphans)
|
||||
logger.info(f"[{workspace}] Found {len(orphans)} orphan entities to process")
|
||||
|
||||
# Step 2: Process each orphan
|
||||
for orphan in orphans:
|
||||
orphan_name = orphan.get("entity_name", "")
|
||||
orphan_content = orphan.get("content", "")
|
||||
orphan_vector = orphan.get("content_vector", "")
|
||||
|
||||
if not orphan_vector:
|
||||
result["errors"].append(f"No vector for orphan: {orphan_name}")
|
||||
continue
|
||||
|
||||
# Step 3: Get candidate connections
|
||||
# Choose query based on cross_connect setting
|
||||
candidate_sql = (
|
||||
SQL_TEMPLATES["get_orphan_candidates"]
|
||||
if allow_cross_connect
|
||||
else SQL_TEMPLATES["get_connected_candidates"]
|
||||
)
|
||||
|
||||
# Format vector for PostgreSQL
|
||||
vector_str = orphan_vector if isinstance(orphan_vector, str) else str(list(orphan_vector))
|
||||
|
||||
candidates = await db.query(
|
||||
candidate_sql,
|
||||
[workspace, vector_str, orphan_name, sim_threshold, max_candidates],
|
||||
multirows=True,
|
||||
)
|
||||
|
||||
if not candidates:
|
||||
logger.debug(f"[{workspace}] No candidates found for orphan: {orphan_name}")
|
||||
continue
|
||||
|
||||
# Step 4: Validate each candidate with LLM
|
||||
for candidate in candidates:
|
||||
candidate_name = candidate.get("entity_name", "")
|
||||
candidate_content = candidate.get("content", "")
|
||||
similarity = candidate.get("similarity", 0.0)
|
||||
|
||||
# Parse entity type from content (format: "entity_type: description")
|
||||
orphan_type = orphan_content.split(":")[0].strip() if ":" in orphan_content else "Unknown"
|
||||
orphan_desc = orphan_content.split(":", 1)[1].strip() if ":" in orphan_content else orphan_content
|
||||
candidate_type = candidate_content.split(":")[0].strip() if ":" in candidate_content else "Unknown"
|
||||
candidate_desc = candidate_content.split(":", 1)[1].strip() if ":" in candidate_content else candidate_content
|
||||
|
||||
# Build validation prompt
|
||||
validation_prompt = PROMPTS["orphan_connection_validation"].format(
|
||||
orphan_name=orphan_name,
|
||||
orphan_type=orphan_type,
|
||||
orphan_description=orphan_desc,
|
||||
candidate_name=candidate_name,
|
||||
candidate_type=candidate_type,
|
||||
candidate_description=candidate_desc,
|
||||
similarity_score=f"{similarity:.3f}",
|
||||
)
|
||||
|
||||
try:
|
||||
# Call LLM for validation
|
||||
llm_response = await self.llm_model_func(validation_prompt)
|
||||
|
||||
# Parse JSON response
|
||||
import json
|
||||
import re
|
||||
|
||||
# Extract JSON from response (handle markdown code blocks)
|
||||
json_match = re.search(r"\{[^{}]*\}", llm_response, re.DOTALL)
|
||||
if not json_match:
|
||||
logger.warning(f"[{workspace}] No JSON in LLM response for {orphan_name} -> {candidate_name}")
|
||||
continue
|
||||
|
||||
validation = json.loads(json_match.group())
|
||||
|
||||
should_connect = validation.get("should_connect", False)
|
||||
confidence = float(validation.get("confidence", 0.0))
|
||||
|
||||
# Step 5: Create connection if validated
|
||||
if should_connect and confidence >= conf_threshold:
|
||||
rel_type = validation.get("relationship_type", "related_to")
|
||||
rel_keywords = validation.get("relationship_keywords", "connection")
|
||||
rel_description = validation.get("relationship_description", f"Connected via orphan resolution (confidence: {confidence:.2f})")
|
||||
|
||||
try:
|
||||
await self.acreate_relation(
|
||||
orphan_name,
|
||||
candidate_name,
|
||||
{
|
||||
"description": rel_description,
|
||||
"keywords": rel_keywords,
|
||||
"source_id": "orphan_connection",
|
||||
},
|
||||
)
|
||||
|
||||
result["connections_made"] += 1
|
||||
result["connections"].append({
|
||||
"orphan": orphan_name,
|
||||
"connected_to": candidate_name,
|
||||
"relationship_type": rel_type,
|
||||
"keywords": rel_keywords,
|
||||
"confidence": confidence,
|
||||
"similarity": similarity,
|
||||
})
|
||||
|
||||
logger.info(
|
||||
f"[{workspace}] Connected orphan '{orphan_name}' -> '{candidate_name}' "
|
||||
f"(confidence: {confidence:.2f}, similarity: {similarity:.3f})"
|
||||
)
|
||||
|
||||
# Only connect to first valid candidate per orphan
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
result["errors"].append(
|
||||
f"Failed to create relation {orphan_name} -> {candidate_name}: {str(e)}"
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
f"[{workspace}] Rejected connection {orphan_name} -> {candidate_name} "
|
||||
f"(should_connect={should_connect}, confidence={confidence:.2f})"
|
||||
)
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
result["errors"].append(
|
||||
f"JSON parse error for {orphan_name} -> {candidate_name}: {str(e)}"
|
||||
)
|
||||
except Exception as e:
|
||||
result["errors"].append(
|
||||
f"LLM validation error for {orphan_name} -> {candidate_name}: {str(e)}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
result["errors"].append(f"Orphan connection failed: {str(e)}")
|
||||
logger.error(f"[{workspace}] Orphan connection error: {e}")
|
||||
|
||||
logger.info(
|
||||
f"[{workspace}] Orphan connection complete: "
|
||||
f"{result['connections_made']}/{result['orphans_found']} orphans connected"
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def connect_orphan_entities(
|
||||
self,
|
||||
max_candidates: int = 3,
|
||||
similarity_threshold: float | None = None,
|
||||
confidence_threshold: float | None = None,
|
||||
cross_connect: bool | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Synchronously connect orphan entities. See aconnect_orphan_entities for details."""
|
||||
loop = always_get_an_event_loop()
|
||||
return loop.run_until_complete(
|
||||
self.aconnect_orphan_entities(
|
||||
max_candidates, similarity_threshold, confidence_threshold, cross_connect
|
||||
)
|
||||
)
|
||||
|
|
|
|||
|
|
@ -22,7 +22,12 @@ You are a Knowledge Graph Specialist responsible for extracting entities and rel
|
|||
* Format: `entity{tuple_delimiter}entity_name{tuple_delimiter}entity_type{tuple_delimiter}entity_description`
|
||||
|
||||
2. **Relationship Extraction & Output:**
|
||||
* **Identification:** Identify direct, clearly stated, and meaningful relationships between previously extracted entities.
|
||||
* **Identification:** Identify meaningful relationships between previously extracted entities. Include:
|
||||
* **Direct relationships:** Explicitly stated interactions or connections.
|
||||
* **Categorical relationships:** Entities belonging to the same category, domain, or class.
|
||||
* **Thematic relationships:** Entities that share a common theme, context, or subject matter.
|
||||
* **Implicit relationships:** Connections inferable from context (e.g., co-occurrence, causation, comparison).
|
||||
* **Hierarchical relationships:** Part-of, member-of, or type-of connections.
|
||||
* **N-ary Relationship Decomposition:** If a single statement describes a relationship involving more than two entities (an N-ary relationship), decompose it into multiple binary (two-entity) relationship pairs for separate description.
|
||||
* **Example:** For "Alice, Bob, and Carol collaborated on Project X," extract binary relationships such as "Alice collaborated with Project X," "Bob collaborated with Project X," and "Carol collaborated with Project X," or "Alice collaborated with Bob," based on the most reasonable binary interpretations.
|
||||
* **Relationship Details:** For each binary relationship, extract the following fields:
|
||||
|
|
@ -32,6 +37,10 @@ You are a Knowledge Graph Specialist responsible for extracting entities and rel
|
|||
* `relationship_description`: A concise explanation of the nature of the relationship between the source and target entities, providing a clear rationale for their connection.
|
||||
* **Output Format - Relationships:** Output a total of 5 fields for each relationship, delimited by `{tuple_delimiter}`, on a single line. The first field *must* be the literal string `relation`.
|
||||
* Format: `relation{tuple_delimiter}source_entity{tuple_delimiter}target_entity{tuple_delimiter}relationship_keywords{tuple_delimiter}relationship_description`
|
||||
* **Relationship Density Requirement:** Strive to extract at least one relationship for EVERY entity. Entities without relationships (orphan nodes) significantly reduce knowledge graph utility. If an entity appears isolated:
|
||||
* Look for implicit categorical or thematic connections to other entities.
|
||||
* Consider whether the entity belongs to a broader group or domain represented by other entities.
|
||||
* Extract comparative relationships if the entity is mentioned alongside others.
|
||||
|
||||
3. **Delimiter Usage Protocol:**
|
||||
* The `{tuple_delimiter}` is a complete, atomic marker and **must not be filled with content**. It serves strictly as a field separator.
|
||||
|
|
@ -81,19 +90,23 @@ Extract entities and relationships from the input text to be processed.
|
|||
"""
|
||||
|
||||
PROMPTS["entity_continue_extraction_user_prompt"] = """---Task---
|
||||
Based on the last extraction task, identify and extract any **missed or incorrectly formatted** entities and relationships from the input text.
|
||||
Based on the last extraction task, identify and extract any **missed or incorrectly formatted** entities and relationships from the input text. Pay special attention to **orphan entities** (entities with no relationships).
|
||||
|
||||
---Instructions---
|
||||
1. **Strict Adherence to System Format:** Strictly adhere to all format requirements for entity and relationship lists, including output order, field delimiters, and proper noun handling, as specified in the system instructions.
|
||||
2. **Focus on Corrections/Additions:**
|
||||
2. **Orphan Entity Resolution (CRITICAL):**
|
||||
* Review the entities from the last extraction. For any entity that has NO relationships, you MUST attempt to find connections.
|
||||
* Look for implicit, categorical, or thematic relationships that connect isolated entities to others.
|
||||
* If an entity is truly unconnected to anything in the text, consider whether it should have been extracted at all.
|
||||
3. **Focus on Corrections/Additions:**
|
||||
* **Do NOT** re-output entities and relationships that were **correctly and fully** extracted in the last task.
|
||||
* If an entity or relationship was **missed** in the last task, extract and output it now according to the system format.
|
||||
* If an entity or relationship was **truncated, had missing fields, or was otherwise incorrectly formatted** in the last task, re-output the *corrected and complete* version in the specified format.
|
||||
3. **Output Format - Entities:** Output a total of 4 fields for each entity, delimited by `{tuple_delimiter}`, on a single line. The first field *must* be the literal string `entity`.
|
||||
4. **Output Format - Relationships:** Output a total of 5 fields for each relationship, delimited by `{tuple_delimiter}`, on a single line. The first field *must* be the literal string `relation`.
|
||||
5. **Output Content Only:** Output *only* the extracted list of entities and relationships. Do not include any introductory or concluding remarks, explanations, or additional text before or after the list.
|
||||
6. **Completion Signal:** Output `{completion_delimiter}` as the final line after all relevant missing or corrected entities and relationships have been extracted and presented.
|
||||
7. **Output Language:** Ensure the output language is {language}. Proper nouns (e.g., personal names, place names, organization names) must be kept in their original language and not translated.
|
||||
4. **Output Format - Entities:** Output a total of 4 fields for each entity, delimited by `{tuple_delimiter}`, on a single line. The first field *must* be the literal string `entity`.
|
||||
5. **Output Format - Relationships:** Output a total of 5 fields for each relationship, delimited by `{tuple_delimiter}`, on a single line. The first field *must* be the literal string `relation`.
|
||||
6. **Output Content Only:** Output *only* the extracted list of entities and relationships. Do not include any introductory or concluding remarks, explanations, or additional text before or after the list.
|
||||
7. **Completion Signal:** Output `{completion_delimiter}` as the final line after all relevant missing or corrected entities and relationships have been extracted and presented.
|
||||
8. **Output Language:** Ensure the output language is {language}. Proper nouns (e.g., personal names, place names, organization names) must be kept in their original language and not translated.
|
||||
|
||||
<Output>
|
||||
"""
|
||||
|
|
@ -143,9 +156,12 @@ entity{tuple_delimiter}Gold Futures{tuple_delimiter}product{tuple_delimiter}Gold
|
|||
entity{tuple_delimiter}Crude Oil{tuple_delimiter}product{tuple_delimiter}Crude oil prices rose to $87.60 per barrel due to supply constraints and strong demand.
|
||||
entity{tuple_delimiter}Market Selloff{tuple_delimiter}category{tuple_delimiter}Market selloff refers to the significant decline in stock values due to investor concerns over interest rates and regulations.
|
||||
entity{tuple_delimiter}Federal Reserve Policy Announcement{tuple_delimiter}category{tuple_delimiter}The Federal Reserve's upcoming policy announcement is expected to impact investor confidence and market stability.
|
||||
entity{tuple_delimiter}3.4% Decline{tuple_delimiter}category{tuple_delimiter}The Global Tech Index experienced a 3.4% decline in midday trading.
|
||||
relation{tuple_delimiter}Global Tech Index{tuple_delimiter}Market Selloff{tuple_delimiter}market performance, investor sentiment{tuple_delimiter}The decline in the Global Tech Index is part of the broader market selloff driven by investor concerns.
|
||||
relation{tuple_delimiter}Nexon Technologies{tuple_delimiter}Global Tech Index{tuple_delimiter}company impact, index movement{tuple_delimiter}Nexon Technologies' stock decline contributed to the overall drop in the Global Tech Index.
|
||||
relation{tuple_delimiter}Nexon Technologies{tuple_delimiter}Market Selloff{tuple_delimiter}tech decline, earnings impact{tuple_delimiter}Nexon Technologies was among the hardest hit in the market selloff after disappointing earnings.
|
||||
relation{tuple_delimiter}Omega Energy{tuple_delimiter}Crude Oil{tuple_delimiter}energy sector, price correlation{tuple_delimiter}Omega Energy's stock gain was driven by rising crude oil prices.
|
||||
relation{tuple_delimiter}Omega Energy{tuple_delimiter}Market Selloff{tuple_delimiter}market contrast, energy resilience{tuple_delimiter}Omega Energy posted gains in contrast to the broader market selloff, showing energy sector resilience.
|
||||
relation{tuple_delimiter}Crude Oil{tuple_delimiter}Market Selloff{tuple_delimiter}commodity rally, market divergence{tuple_delimiter}Crude oil prices rallied while stock markets experienced a selloff, reflecting divergent market dynamics.
|
||||
relation{tuple_delimiter}Gold Futures{tuple_delimiter}Market Selloff{tuple_delimiter}market reaction, safe-haven investment{tuple_delimiter}Gold prices rose as investors sought safe-haven assets during the market selloff.
|
||||
relation{tuple_delimiter}Federal Reserve Policy Announcement{tuple_delimiter}Market Selloff{tuple_delimiter}interest rate impact, financial regulation{tuple_delimiter}Speculation over Federal Reserve policy changes contributed to market volatility and investor selloff.
|
||||
{completion_delimiter}
|
||||
|
|
@ -419,3 +435,52 @@ Output:
|
|||
|
||||
""",
|
||||
]
|
||||
|
||||
PROMPTS["orphan_connection_validation"] = """---Role---
|
||||
You are a Knowledge Graph Quality Specialist. Your task is to evaluate whether a proposed relationship between two entities is meaningful and should be added to a knowledge graph.
|
||||
|
||||
---Context---
|
||||
An orphan entity (entity with no connections) has been identified. Vector similarity search found a potentially related entity. You must determine if a genuine, meaningful relationship exists between them.
|
||||
|
||||
---Input---
|
||||
**Orphan Entity:**
|
||||
- Name: {orphan_name}
|
||||
- Type: {orphan_type}
|
||||
- Description: {orphan_description}
|
||||
|
||||
**Candidate Entity:**
|
||||
- Name: {candidate_name}
|
||||
- Type: {candidate_type}
|
||||
- Description: {candidate_description}
|
||||
|
||||
**Vector Similarity Score:** {similarity_score}
|
||||
|
||||
---Instructions---
|
||||
1. Analyze both entities carefully based on their names, types, and descriptions.
|
||||
2. Determine if there is a genuine, meaningful relationship between them. Consider:
|
||||
- Direct relationships (interaction, causation, membership)
|
||||
- Categorical relationships (same domain, field, or category)
|
||||
- Thematic relationships (shared concepts, contexts, or subject matter)
|
||||
- Hierarchical relationships (part-of, type-of, related-to)
|
||||
3. If a relationship exists, describe it and provide your confidence level.
|
||||
4. If NO meaningful relationship exists, state this clearly. High vector similarity alone is NOT sufficient - entities must have a logical, describable connection.
|
||||
|
||||
---Output Format---
|
||||
Your response MUST be a valid JSON object with exactly these fields:
|
||||
{{
|
||||
"should_connect": true/false,
|
||||
"confidence": 0.0-1.0,
|
||||
"relationship_type": "type of relationship or null",
|
||||
"relationship_keywords": "comma-separated keywords or null",
|
||||
"relationship_description": "description of the relationship or null",
|
||||
"reasoning": "brief explanation of your decision"
|
||||
}}
|
||||
|
||||
---Decision Guidelines---
|
||||
- `should_connect: true` ONLY if you can articulate a clear, logical relationship
|
||||
- `confidence >= 0.7` required for connection to be created
|
||||
- High similarity + no logical connection = should_connect: false
|
||||
- When in doubt, reject the connection (orphans are better than garbage connections)
|
||||
|
||||
---Output---
|
||||
"""
|
||||
|
|
|
|||
294
tests/run_quality_tests.py
Normal file
294
tests/run_quality_tests.py
Normal file
|
|
@ -0,0 +1,294 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Orphan Connection Quality Validation Script
|
||||
|
||||
Runs actual queries against LightRAG and analyzes whether orphan connections
|
||||
improve or poison retrieval quality.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import httpx
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
API_BASE = "http://localhost:9622"
|
||||
|
||||
|
||||
@dataclass
|
||||
class TestResult:
|
||||
query: str
|
||||
expected: list[str]
|
||||
unexpected: list[str]
|
||||
retrieved_entities: list[str]
|
||||
precision: float
|
||||
recall: float
|
||||
noise_count: int
|
||||
passed: bool
|
||||
details: str
|
||||
|
||||
|
||||
TEST_CASES = [
|
||||
# Test 1: Neural Network Types (PRECISION)
|
||||
# Note: "Quantum" may appear legitimately due to "Quantum Machine Learning" being a real field
|
||||
{
|
||||
"query": "What types of neural networks are used in deep learning?",
|
||||
"expected": ["Neural Networks", "Convolutional Neural Network",
|
||||
"Recurrent Neural Network", "Transformer"],
|
||||
"unexpected": ["FDA", "Atopic Dermatitis", "Vehicle Emissions Standards"], # Truly unrelated
|
||||
"category": "precision",
|
||||
"description": "Should retrieve NN types via orphan connections (CNN->NN, RNN->NN)"
|
||||
},
|
||||
# Test 2: Quantum Companies (RECALL)
|
||||
{
|
||||
"query": "What companies are working on quantum computing?",
|
||||
"expected": ["IonQ", "Microsoft", "Google", "IBM"],
|
||||
"unexpected": ["FDA", "Atopic Dermatitis"], # Medical domain unrelated
|
||||
"category": "recall",
|
||||
"description": "Should find IonQ (via Trapped Ions) and Microsoft (via Topological Qubits)"
|
||||
},
|
||||
# Test 3: Greenhouse Gases (RECALL)
|
||||
# Note: "Quantum" may appear due to "climate simulation via quantum computing" being valid
|
||||
{
|
||||
"query": "What are greenhouse gases?",
|
||||
"expected": ["Carbon Dioxide", "CO2", "Methane", "CH4", "Nitrous Oxide", "N2O", "Fluorinated"],
|
||||
"unexpected": ["FDA", "Atopic Dermatitis", "IonQ"], # Medical/specific tech unrelated
|
||||
"category": "recall",
|
||||
"description": "Should retrieve all GHGs via orphan connections forming a cluster"
|
||||
},
|
||||
# Test 4: Reinforcement Learning (NOISE)
|
||||
# Note: Cross-domain mentions like "climate modeling" may appear from original docs
|
||||
{
|
||||
"query": "What is reinforcement learning?",
|
||||
"expected": ["Reinforcement Learning", "Machine Learning"],
|
||||
"unexpected": ["FDA", "Atopic Dermatitis", "Dupixent"], # Medical domain truly unrelated
|
||||
"category": "noise",
|
||||
"description": "Should NOT pull in truly unrelated medical domain"
|
||||
},
|
||||
# Test 5: Computer Vision (NOISE)
|
||||
# Note: Drug Discovery may appear due to "medical imaging" being a CV application
|
||||
{
|
||||
"query": "How does computer vision work?",
|
||||
"expected": ["Computer Vision", "Image", "Object", "Feature", "Edge Detection"],
|
||||
"unexpected": ["FDA", "Atopic Dermatitis", "Kyoto Protocol"], # Truly unrelated domains
|
||||
"category": "noise",
|
||||
"description": "Should retrieve CV techniques, not truly unrelated domains"
|
||||
},
|
||||
# Test 6: Amazon Cross-Domain Check (EDGE CASE)
|
||||
{
|
||||
"query": "What is Amazon?",
|
||||
"expected": ["Amazon"],
|
||||
"unexpected": ["FDA", "Atopic Dermatitis"], # Medical domain unrelated to tech company
|
||||
"category": "edge_case",
|
||||
"description": "Check if Amazon->Microsoft connection causes retrieval issues"
|
||||
},
|
||||
# Test 7: Medical Domain Isolation (STRICT NOISE TEST)
|
||||
{
|
||||
"query": "What is Dupixent used for?",
|
||||
"expected": ["Dupixent", "Atopic Dermatitis", "FDA"],
|
||||
"unexpected": ["Neural Networks", "Quantum Computing", "Climate Change", "IonQ"],
|
||||
"category": "noise",
|
||||
"description": "Medical query should NOT retrieve tech/climate domains"
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
async def run_query(query: str, mode: str = "local") -> dict:
|
||||
"""Run a query against LightRAG API."""
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
response = await client.post(
|
||||
f"{API_BASE}/query",
|
||||
json={
|
||||
"query": query,
|
||||
"mode": mode,
|
||||
"only_need_context": True
|
||||
}
|
||||
)
|
||||
return response.json()
|
||||
|
||||
|
||||
def extract_entities_from_context(context: str) -> list[str]:
|
||||
"""Extract entity names from the context string."""
|
||||
entities = []
|
||||
# Look for entity patterns in the context
|
||||
lines = context.split('\n')
|
||||
for line in lines:
|
||||
# Entity lines often start with entity names in quotes or bold
|
||||
if 'Entity:' in line or line.startswith('-'):
|
||||
# Extract potential entity name
|
||||
parts = line.split(':')
|
||||
if len(parts) > 1:
|
||||
entity = parts[1].strip().strip('"').strip("'")
|
||||
if entity and len(entity) > 2:
|
||||
entities.append(entity)
|
||||
return entities
|
||||
|
||||
|
||||
async def evaluate_test_case(test_case: dict) -> TestResult:
|
||||
"""Evaluate a single test case."""
|
||||
query = test_case["query"]
|
||||
expected = test_case["expected"]
|
||||
unexpected = test_case["unexpected"]
|
||||
|
||||
try:
|
||||
result = await run_query(query)
|
||||
response_text = result.get("response", "")
|
||||
|
||||
# Check which expected entities appear in the response
|
||||
found_expected = []
|
||||
missed_expected = []
|
||||
for entity in expected:
|
||||
# Case-insensitive partial match
|
||||
if entity.lower() in response_text.lower():
|
||||
found_expected.append(entity)
|
||||
else:
|
||||
missed_expected.append(entity)
|
||||
|
||||
# Check for unexpected (noise) entities
|
||||
found_unexpected = []
|
||||
for entity in unexpected:
|
||||
if entity.lower() in response_text.lower():
|
||||
found_unexpected.append(entity)
|
||||
|
||||
# Calculate metrics
|
||||
precision = len(found_expected) / len(expected) if expected else 1.0
|
||||
recall = len(found_expected) / len(expected) if expected else 1.0
|
||||
noise_count = len(found_unexpected)
|
||||
|
||||
# Pass criteria: recall > 50% AND no noise detected
|
||||
passed = recall >= 0.5 and noise_count == 0
|
||||
|
||||
details = f"Found: {found_expected} | Missed: {missed_expected} | Noise: {found_unexpected}"
|
||||
|
||||
return TestResult(
|
||||
query=query,
|
||||
expected=expected,
|
||||
unexpected=unexpected,
|
||||
retrieved_entities=found_expected,
|
||||
precision=precision,
|
||||
recall=recall,
|
||||
noise_count=noise_count,
|
||||
passed=passed,
|
||||
details=details
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
return TestResult(
|
||||
query=query,
|
||||
expected=expected,
|
||||
unexpected=unexpected,
|
||||
retrieved_entities=[],
|
||||
precision=0.0,
|
||||
recall=0.0,
|
||||
noise_count=0,
|
||||
passed=False,
|
||||
details=f"Error: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
async def get_graph_stats() -> dict:
|
||||
"""Get current graph statistics."""
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
health = await client.get(f"{API_BASE}/health")
|
||||
graph = await client.get(f"{API_BASE}/graphs?label=*&max_depth=0&max_nodes=1000")
|
||||
|
||||
graph_data = graph.json()
|
||||
nodes = graph_data.get("nodes", [])
|
||||
edges = graph_data.get("edges", [])
|
||||
|
||||
# Count orphans (nodes with no edges)
|
||||
node_ids = {n["id"] for n in nodes}
|
||||
connected_ids = set()
|
||||
for e in edges:
|
||||
connected_ids.add(e.get("source"))
|
||||
connected_ids.add(e.get("target"))
|
||||
|
||||
orphan_ids = node_ids - connected_ids
|
||||
|
||||
return {
|
||||
"total_nodes": len(nodes),
|
||||
"total_edges": len(edges),
|
||||
"orphan_count": len(orphan_ids),
|
||||
"orphan_rate": len(orphan_ids) / len(nodes) if nodes else 0
|
||||
}
|
||||
|
||||
|
||||
async def main():
|
||||
print("=" * 60)
|
||||
print("ORPHAN CONNECTION QUALITY VALIDATION")
|
||||
print("=" * 60)
|
||||
|
||||
# Get graph stats first
|
||||
try:
|
||||
stats = await get_graph_stats()
|
||||
print(f"\n📊 Current Graph Statistics:")
|
||||
print(f" Nodes: {stats['total_nodes']}")
|
||||
print(f" Edges: {stats['total_edges']}")
|
||||
print(f" Orphans: {stats['orphan_count']} ({stats['orphan_rate']:.1%})")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Could not get graph stats: {e}")
|
||||
|
||||
print("\n" + "-" * 60)
|
||||
print("Running Quality Tests...")
|
||||
print("-" * 60)
|
||||
|
||||
results = []
|
||||
for i, test_case in enumerate(TEST_CASES, 1):
|
||||
print(f"\n🧪 Test {i}: {test_case['category'].upper()} - {test_case['description']}")
|
||||
print(f" Query: \"{test_case['query']}\"")
|
||||
|
||||
result = await evaluate_test_case(test_case)
|
||||
results.append(result)
|
||||
|
||||
status = "✅ PASS" if result.passed else "❌ FAIL"
|
||||
print(f" {status}")
|
||||
print(f" Recall: {result.recall:.0%} | Noise: {result.noise_count}")
|
||||
print(f" {result.details}")
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 60)
|
||||
print("SUMMARY")
|
||||
print("=" * 60)
|
||||
|
||||
passed = sum(1 for r in results if r.passed)
|
||||
total = len(results)
|
||||
avg_recall = sum(r.recall for r in results) / len(results)
|
||||
total_noise = sum(r.noise_count for r in results)
|
||||
|
||||
print(f"\n📈 Results: {passed}/{total} tests passed ({passed/total:.0%})")
|
||||
print(f"📈 Average Recall: {avg_recall:.0%}")
|
||||
print(f"📈 Total Noise Instances: {total_noise}")
|
||||
|
||||
# Category breakdown
|
||||
categories = {}
|
||||
for r, tc in zip(results, TEST_CASES):
|
||||
cat = tc["category"]
|
||||
if cat not in categories:
|
||||
categories[cat] = {"passed": 0, "total": 0}
|
||||
categories[cat]["total"] += 1
|
||||
if r.passed:
|
||||
categories[cat]["passed"] += 1
|
||||
|
||||
print("\n📊 By Category:")
|
||||
for cat, data in categories.items():
|
||||
print(f" {cat.upper()}: {data['passed']}/{data['total']}")
|
||||
|
||||
# Verdict
|
||||
print("\n" + "-" * 60)
|
||||
if total_noise == 0 and avg_recall >= 0.6:
|
||||
print("✅ VERDICT: Orphan connections are IMPROVING retrieval")
|
||||
print(" - No cross-domain pollution detected")
|
||||
print(" - Good recall on expected entities")
|
||||
elif total_noise > 0:
|
||||
print("⚠️ VERDICT: Orphan connections MAY BE POISONING retrieval")
|
||||
print(f" - {total_noise} noise instances detected")
|
||||
print(" - Review the connections causing cross-domain bleed")
|
||||
else:
|
||||
print("⚠️ VERDICT: Orphan connections have MIXED results")
|
||||
print(" - Recall could be improved")
|
||||
print(" - No significant noise detected")
|
||||
print("-" * 60)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
220
tests/test_orphan_connection_quality.py
Normal file
220
tests/test_orphan_connection_quality.py
Normal file
|
|
@ -0,0 +1,220 @@
|
|||
"""
|
||||
Orphan Connection Quality Tests
|
||||
|
||||
Tests to validate that orphan connections improve (not poison) retrieval quality.
|
||||
|
||||
Test Categories:
|
||||
1. Precision tests - Do orphan connections add relevant context?
|
||||
2. Recall tests - Do orphan connections help find information that was missed?
|
||||
3. Noise tests - Do orphan connections introduce irrelevant information?
|
||||
4. A/B comparison - Same queries with/without connections
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class QueryTestCase:
|
||||
"""A test case for evaluating retrieval quality."""
|
||||
query: str
|
||||
expected_entities: list[str] # Entities that SHOULD be retrieved
|
||||
unexpected_entities: list[str] # Entities that should NOT be retrieved
|
||||
description: str
|
||||
category: str # "precision", "recall", "noise"
|
||||
|
||||
|
||||
# Test cases designed to evaluate orphan connection quality
|
||||
TEST_CASES = [
|
||||
# PRECISION TESTS - Do we retrieve the RIGHT things?
|
||||
QueryTestCase(
|
||||
query="What types of neural networks are used in deep learning?",
|
||||
expected_entities=["Neural Networks", "Convolutional Neural Network",
|
||||
"Recurrent Neural Network", "Transformer"],
|
||||
unexpected_entities=["Quantum Computing", "Climate Change", "FDA"],
|
||||
description="Should retrieve NN types via orphan connections (CNN->NN, RNN->NN)",
|
||||
category="precision"
|
||||
),
|
||||
QueryTestCase(
|
||||
query="What quantum computing hardware approaches exist?",
|
||||
expected_entities=["Qubit", "Trapped Ions", "Superconducting Qubits",
|
||||
"Photonic Qubits", "Topological Qubits", "IonQ"],
|
||||
unexpected_entities=["Neural Networks", "Machine Learning", "Climate Change"],
|
||||
description="Should retrieve qubit types via orphan connections",
|
||||
category="precision"
|
||||
),
|
||||
|
||||
# RECALL TESTS - Do we find things we would have MISSED without connections?
|
||||
QueryTestCase(
|
||||
query="What companies are working on quantum computing?",
|
||||
expected_entities=["IonQ", "Microsoft", "Google", "IBM"],
|
||||
unexpected_entities=[],
|
||||
description="Should find IonQ (connected via Trapped Ions) and Microsoft (via Topological Qubits)",
|
||||
category="recall"
|
||||
),
|
||||
QueryTestCase(
|
||||
query="What are greenhouse gases?",
|
||||
expected_entities=["Carbon Dioxide (CO2)", "Methane (CH4)", "Nitrous Oxide (N2O)",
|
||||
"Fluorinated Gases"],
|
||||
unexpected_entities=["Machine Learning", "Quantum Computing"],
|
||||
description="Should retrieve all GHGs via orphan connections forming a cluster",
|
||||
category="recall"
|
||||
),
|
||||
|
||||
# NOISE TESTS - Do we retrieve IRRELEVANT things?
|
||||
QueryTestCase(
|
||||
query="What is reinforcement learning?",
|
||||
expected_entities=["Reinforcement Learning", "Machine Learning"],
|
||||
unexpected_entities=["Climate Change", "FDA", "Vehicle Emissions Standards"],
|
||||
description="Should NOT pull in unrelated domains despite graph connectivity",
|
||||
category="noise"
|
||||
),
|
||||
QueryTestCase(
|
||||
query="How does computer vision work?",
|
||||
expected_entities=["Computer Vision", "Image Segmentation", "Object Tracking",
|
||||
"Feature Extraction", "Edge Detection"],
|
||||
unexpected_entities=["Quantum Computing", "Climate Modeling", "Drug Discovery"],
|
||||
description="Should retrieve CV techniques, not unrelated domains",
|
||||
category="noise"
|
||||
),
|
||||
|
||||
# EDGE CASE - Orphan connections shouldn't create nonsense pathways
|
||||
QueryTestCase(
|
||||
query="What is Amazon?",
|
||||
expected_entities=["Amazon"],
|
||||
unexpected_entities=[], # We connected Amazon -> Microsoft, is this causing issues?
|
||||
description="Amazon query - check if connection to Microsoft causes retrieval issues",
|
||||
category="noise"
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
async def run_query(rag, query: str, mode: str = "local") -> dict:
|
||||
"""Run a query and return retrieved entities."""
|
||||
# This would need to be adapted based on how LightRAG returns context
|
||||
result = await rag.aquery(query, param={"mode": mode})
|
||||
return result
|
||||
|
||||
|
||||
async def evaluate_test_case(rag, test_case: QueryTestCase) -> dict:
|
||||
"""Evaluate a single test case."""
|
||||
result = await run_query(rag, test_case.query)
|
||||
|
||||
# Extract retrieved entities from result
|
||||
# (Implementation depends on LightRAG response format)
|
||||
retrieved_entities = [] # Parse from result
|
||||
|
||||
# Calculate metrics
|
||||
expected_found = [e for e in test_case.expected_entities if e in retrieved_entities]
|
||||
unexpected_found = [e for e in test_case.unexpected_entities if e in retrieved_entities]
|
||||
|
||||
precision = len(expected_found) / len(retrieved_entities) if retrieved_entities else 0
|
||||
recall = len(expected_found) / len(test_case.expected_entities) if test_case.expected_entities else 1
|
||||
noise_rate = len(unexpected_found) / len(retrieved_entities) if retrieved_entities else 0
|
||||
|
||||
return {
|
||||
"test_case": test_case.description,
|
||||
"category": test_case.category,
|
||||
"query": test_case.query,
|
||||
"expected_found": expected_found,
|
||||
"expected_missed": [e for e in test_case.expected_entities if e not in retrieved_entities],
|
||||
"unexpected_found": unexpected_found,
|
||||
"precision": precision,
|
||||
"recall": recall,
|
||||
"noise_rate": noise_rate,
|
||||
"pass": len(unexpected_found) == 0 and recall > 0.5
|
||||
}
|
||||
|
||||
|
||||
async def run_ab_comparison(rag_with_connections, rag_without_connections, query: str) -> dict:
|
||||
"""
|
||||
Compare retrieval results with and without orphan connections.
|
||||
|
||||
This requires two separate LightRAG instances:
|
||||
- One with orphan connections applied
|
||||
- One without (baseline)
|
||||
"""
|
||||
result_with = await run_query(rag_with_connections, query)
|
||||
result_without = await run_query(rag_without_connections, query)
|
||||
|
||||
return {
|
||||
"query": query,
|
||||
"with_connections": result_with,
|
||||
"without_connections": result_without,
|
||||
"improved": None, # Human evaluation needed
|
||||
}
|
||||
|
||||
|
||||
def generate_test_report(results: list[dict]) -> str:
|
||||
"""Generate a test report from evaluation results."""
|
||||
report = ["# Orphan Connection Quality Test Report\n"]
|
||||
|
||||
# Summary by category
|
||||
for category in ["precision", "recall", "noise"]:
|
||||
cat_results = [r for r in results if r["category"] == category]
|
||||
if cat_results:
|
||||
passed = sum(1 for r in cat_results if r["pass"])
|
||||
report.append(f"\n## {category.upper()} Tests: {passed}/{len(cat_results)} passed\n")
|
||||
for r in cat_results:
|
||||
status = "✅" if r["pass"] else "❌"
|
||||
report.append(f"- {status} {r['test_case']}")
|
||||
if r.get("unexpected_found"):
|
||||
report.append(f" - ⚠️ Noise detected: {r['unexpected_found']}")
|
||||
|
||||
# Overall metrics
|
||||
all_precision = [r["precision"] for r in results if r["precision"] is not None]
|
||||
all_recall = [r["recall"] for r in results if r["recall"] is not None]
|
||||
all_noise = [r["noise_rate"] for r in results if r["noise_rate"] is not None]
|
||||
|
||||
report.append(f"\n## Overall Metrics")
|
||||
report.append(f"- Average Precision: {sum(all_precision)/len(all_precision):.2f}")
|
||||
report.append(f"- Average Recall: {sum(all_recall)/len(all_recall):.2f}")
|
||||
report.append(f"- Average Noise Rate: {sum(all_noise)/len(all_noise):.2f}")
|
||||
|
||||
return "\n".join(report)
|
||||
|
||||
|
||||
# Manual evaluation checklist
|
||||
EVALUATION_CHECKLIST = """
|
||||
## Manual Evaluation Checklist
|
||||
|
||||
For each orphan connection, evaluate:
|
||||
|
||||
1. **Semantic Validity** (Is the connection logically correct?)
|
||||
- [ ] The entities are genuinely related
|
||||
- [ ] The relationship type makes sense
|
||||
- [ ] A human expert would agree with this connection
|
||||
|
||||
2. **Retrieval Impact** (Does this help or hurt queries?)
|
||||
- [ ] Queries about entity A now appropriately include entity B
|
||||
- [ ] Queries about entity B now appropriately include entity A
|
||||
- [ ] No unrelated queries are polluted by this connection
|
||||
|
||||
3. **Specificity** (Is the connection too broad?)
|
||||
- [ ] The connection is specific enough to be useful
|
||||
- [ ] Not just "both are technology" or "both are nouns"
|
||||
- [ ] The relationship description is meaningful
|
||||
|
||||
4. **Directionality** (Does the relationship make sense both ways?)
|
||||
- [ ] Query for A -> retrieves B makes sense
|
||||
- [ ] Query for B -> retrieves A makes sense
|
||||
|
||||
## Red Flags to Watch For:
|
||||
- Connections between entirely different domains (e.g., Climate -> Quantum)
|
||||
- Very low similarity scores with high confidence (LLM hallucination?)
|
||||
- Hub entities getting too many connections (becoming noise magnets)
|
||||
- Circular clusters forming (A->B->C->A with no external connections)
|
||||
"""
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Orphan Connection Quality Test Framework")
|
||||
print("=" * 50)
|
||||
print(f"Total test cases: {len(TEST_CASES)}")
|
||||
print(f"- Precision tests: {len([t for t in TEST_CASES if t.category == 'precision'])}")
|
||||
print(f"- Recall tests: {len([t for t in TEST_CASES if t.category == 'recall'])}")
|
||||
print(f"- Noise tests: {len([t for t in TEST_CASES if t.category == 'noise'])}")
|
||||
print("\nRun with a LightRAG instance to execute tests.")
|
||||
print(EVALUATION_CHECKLIST)
|
||||
Loading…
Add table
Reference in a new issue