minor simplifications

This commit is contained in:
Alexander Belikov 2025-11-13 18:05:33 +01:00
parent 3f33d30c33
commit cdbb0b0826
2 changed files with 36 additions and 47 deletions

View file

@ -10,7 +10,7 @@ from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed
from lightrag.kg.shared_storage import initialize_pipeline_status
from lightrag.utils import logger, set_verbose_debug
WORKING_DIR = "./tigergraph_test_dir"
WORKING_DIR = "./dickens"
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
@ -159,32 +159,35 @@ async def test_ingestion(json_file=None):
print(f" ✓ Document inserted with track_id: {track_id}")
# Test JSON ingestion if JSON file is provided or exists
json_test_file = Path(json_file) if json_file else Path("test_data.json")
if json_test_file.exists():
print("\n" + "=" * 60)
print("Ingesting JSON file...")
print("=" * 60)
if json_file:
json_test_file = Path(json_file)
if json_test_file.exists():
print("\n" + "=" * 60)
print("Ingesting JSON file...")
print("=" * 60)
try:
texts = load_json_texts(json_test_file)
print(f"✓ Loaded {len(texts)} texts from {json_test_file}")
try:
texts = load_json_texts(json_test_file)
print(f"✓ Loaded {len(texts)} texts from {json_test_file}")
for i, text in enumerate(texts, 1):
print(f"\n[{i}/{len(texts)}] Inserting from JSON...")
track_id = await rag.ainsert(input=text, file_paths=str(json_test_file))
print(f" ✓ Text inserted with track_id: {track_id}")
except Exception as e:
print(f"✗ Error loading JSON file: {e}")
import traceback
for i, text in enumerate(texts, 1):
print(f"\n[{i}/{len(texts)}] Inserting from JSON...")
track_id = await rag.ainsert(
input=text, file_paths=str(json_test_file)
)
print(f" ✓ Text inserted with track_id: {track_id}")
except Exception as e:
print(f"✗ Error loading JSON file: {e}")
import traceback
traceback.print_exc()
else:
print(
f"\n No JSON file found at {json_test_file} (skipping JSON ingestion test)"
)
print(" Create a test_data.json file with format:")
print(' [{"text": "Your text here"}, {"text": "Another text"}]')
print(" Or use --json-file parameter to specify a JSON file")
traceback.print_exc()
else:
print(
f"\n No JSON file found at {json_test_file} (skipping JSON ingestion test)"
)
print(" Create a test_data.json file with format:")
print(' [{"text": "Your text here"}, {"text": "Another text"}]')
print(" Or use --json-file parameter to specify a JSON file")
print("\n" + "=" * 60)
print("Verifying ingestion...")

View file

@ -1213,25 +1213,11 @@ class TigerGraphStorage(BaseGraphStorage):
if "entity_id" not in node_data_copy:
node_data_copy["entity_id"] = node_id
# Ensure labels SET includes workspace and entity_type
entity_type = node_data_copy.get("entity_type", "UNKNOWN")
if "labels" not in node_data_copy:
# Create labels set with workspace and entity_type
labels_set = {workspace_label, entity_type}
else:
# Ensure labels is a set and includes workspace and entity_type
if isinstance(node_data_copy["labels"], (list, tuple)):
labels_set = set(node_data_copy["labels"])
elif isinstance(node_data_copy["labels"], set):
labels_set = node_data_copy["labels"].copy()
else:
labels_set = {str(node_data_copy["labels"])}
# Add workspace and entity_type to labels
labels_set.add(workspace_label)
labels_set.add(entity_type)
# Convert set to list for JSON serialization (TigerGraph REST API expects list for SET<STRING>)
node_data_copy["labels"] = list(labels_set)
# Ensure labels SET includes ONLY workspace (for filtering/isolation)
# entity_type should NOT be in labels - it's stored in entity_type property
# Always set labels to contain only workspace, regardless of what's in node_data
# This ensures entity_type never seeps into labels, even if it was there before
node_data_copy["labels"] = [workspace_label]
# Upsert vertex
self._conn.upsertVertex(
@ -1284,8 +1270,8 @@ class TigerGraphStorage(BaseGraphStorage):
{
"entity_id": source_node_id,
"labels": list(
{workspace_label, "UNKNOWN"}
), # Convert to list for JSON
{workspace_label}
), # Only workspace in labels
"entity_type": "UNKNOWN",
},
)
@ -1315,8 +1301,8 @@ class TigerGraphStorage(BaseGraphStorage):
{
"entity_id": target_node_id,
"labels": list(
{workspace_label, "UNKNOWN"}
), # Convert to list for JSON
{workspace_label}
), # Only workspace in labels
"entity_type": "UNKNOWN",
},
)