minor simplifications
This commit is contained in:
parent
3f33d30c33
commit
cdbb0b0826
2 changed files with 36 additions and 47 deletions
|
|
@ -10,7 +10,7 @@ from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed
|
|||
from lightrag.kg.shared_storage import initialize_pipeline_status
|
||||
from lightrag.utils import logger, set_verbose_debug
|
||||
|
||||
WORKING_DIR = "./tigergraph_test_dir"
|
||||
WORKING_DIR = "./dickens"
|
||||
if not os.path.exists(WORKING_DIR):
|
||||
os.mkdir(WORKING_DIR)
|
||||
|
||||
|
|
@ -159,32 +159,35 @@ async def test_ingestion(json_file=None):
|
|||
print(f" ✓ Document inserted with track_id: {track_id}")
|
||||
|
||||
# Test JSON ingestion if JSON file is provided or exists
|
||||
json_test_file = Path(json_file) if json_file else Path("test_data.json")
|
||||
if json_test_file.exists():
|
||||
print("\n" + "=" * 60)
|
||||
print("Ingesting JSON file...")
|
||||
print("=" * 60)
|
||||
if json_file:
|
||||
json_test_file = Path(json_file)
|
||||
if json_test_file.exists():
|
||||
print("\n" + "=" * 60)
|
||||
print("Ingesting JSON file...")
|
||||
print("=" * 60)
|
||||
|
||||
try:
|
||||
texts = load_json_texts(json_test_file)
|
||||
print(f"✓ Loaded {len(texts)} texts from {json_test_file}")
|
||||
try:
|
||||
texts = load_json_texts(json_test_file)
|
||||
print(f"✓ Loaded {len(texts)} texts from {json_test_file}")
|
||||
|
||||
for i, text in enumerate(texts, 1):
|
||||
print(f"\n[{i}/{len(texts)}] Inserting from JSON...")
|
||||
track_id = await rag.ainsert(input=text, file_paths=str(json_test_file))
|
||||
print(f" ✓ Text inserted with track_id: {track_id}")
|
||||
except Exception as e:
|
||||
print(f"✗ Error loading JSON file: {e}")
|
||||
import traceback
|
||||
for i, text in enumerate(texts, 1):
|
||||
print(f"\n[{i}/{len(texts)}] Inserting from JSON...")
|
||||
track_id = await rag.ainsert(
|
||||
input=text, file_paths=str(json_test_file)
|
||||
)
|
||||
print(f" ✓ Text inserted with track_id: {track_id}")
|
||||
except Exception as e:
|
||||
print(f"✗ Error loading JSON file: {e}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
else:
|
||||
print(
|
||||
f"\nℹ No JSON file found at {json_test_file} (skipping JSON ingestion test)"
|
||||
)
|
||||
print(" Create a test_data.json file with format:")
|
||||
print(' [{"text": "Your text here"}, {"text": "Another text"}]')
|
||||
print(" Or use --json-file parameter to specify a JSON file")
|
||||
traceback.print_exc()
|
||||
else:
|
||||
print(
|
||||
f"\nℹ No JSON file found at {json_test_file} (skipping JSON ingestion test)"
|
||||
)
|
||||
print(" Create a test_data.json file with format:")
|
||||
print(' [{"text": "Your text here"}, {"text": "Another text"}]')
|
||||
print(" Or use --json-file parameter to specify a JSON file")
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("Verifying ingestion...")
|
||||
|
|
|
|||
|
|
@ -1213,25 +1213,11 @@ class TigerGraphStorage(BaseGraphStorage):
|
|||
if "entity_id" not in node_data_copy:
|
||||
node_data_copy["entity_id"] = node_id
|
||||
|
||||
# Ensure labels SET includes workspace and entity_type
|
||||
entity_type = node_data_copy.get("entity_type", "UNKNOWN")
|
||||
if "labels" not in node_data_copy:
|
||||
# Create labels set with workspace and entity_type
|
||||
labels_set = {workspace_label, entity_type}
|
||||
else:
|
||||
# Ensure labels is a set and includes workspace and entity_type
|
||||
if isinstance(node_data_copy["labels"], (list, tuple)):
|
||||
labels_set = set(node_data_copy["labels"])
|
||||
elif isinstance(node_data_copy["labels"], set):
|
||||
labels_set = node_data_copy["labels"].copy()
|
||||
else:
|
||||
labels_set = {str(node_data_copy["labels"])}
|
||||
# Add workspace and entity_type to labels
|
||||
labels_set.add(workspace_label)
|
||||
labels_set.add(entity_type)
|
||||
|
||||
# Convert set to list for JSON serialization (TigerGraph REST API expects list for SET<STRING>)
|
||||
node_data_copy["labels"] = list(labels_set)
|
||||
# Ensure labels SET includes ONLY workspace (for filtering/isolation)
|
||||
# entity_type should NOT be in labels - it's stored in entity_type property
|
||||
# Always set labels to contain only workspace, regardless of what's in node_data
|
||||
# This ensures entity_type never seeps into labels, even if it was there before
|
||||
node_data_copy["labels"] = [workspace_label]
|
||||
|
||||
# Upsert vertex
|
||||
self._conn.upsertVertex(
|
||||
|
|
@ -1284,8 +1270,8 @@ class TigerGraphStorage(BaseGraphStorage):
|
|||
{
|
||||
"entity_id": source_node_id,
|
||||
"labels": list(
|
||||
{workspace_label, "UNKNOWN"}
|
||||
), # Convert to list for JSON
|
||||
{workspace_label}
|
||||
), # Only workspace in labels
|
||||
"entity_type": "UNKNOWN",
|
||||
},
|
||||
)
|
||||
|
|
@ -1315,8 +1301,8 @@ class TigerGraphStorage(BaseGraphStorage):
|
|||
{
|
||||
"entity_id": target_node_id,
|
||||
"labels": list(
|
||||
{workspace_label, "UNKNOWN"}
|
||||
), # Convert to list for JSON
|
||||
{workspace_label}
|
||||
), # Only workspace in labels
|
||||
"entity_type": "UNKNOWN",
|
||||
},
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue