Merge 4945faf021 into 9562a974d2

2025-12-12 10:42:23 +08:00 · 2025-12-12 10:42:23 +08:00 · 33bde4161a
commit 33bde4161a
parent 9562a974d2 4945faf021
10 changed files with 3052 additions and 1132 deletions
--- a/README.md
+++ b/README.md
@ -292,7 +292,7 @@ A full list of LightRAG init parameters:
 | **workspace** | str | Workspace name for data isolation between different LightRAG Instances |  |
 | **kv_storage** | `str` | Storage type for documents and text chunks. Supported types: `JsonKVStorage`,`PGKVStorage`,`RedisKVStorage`,`MongoKVStorage` | `JsonKVStorage` |
 | **vector_storage** | `str` | Storage type for embedding vectors. Supported types: `NanoVectorDBStorage`,`PGVectorStorage`,`MilvusVectorDBStorage`,`ChromaVectorDBStorage`,`FaissVectorDBStorage`,`MongoVectorDBStorage`,`QdrantVectorDBStorage` | `NanoVectorDBStorage` |
-| **graph_storage** | `str` | Storage type for graph edges and nodes. Supported types: `NetworkXStorage`,`Neo4JStorage`,`PGGraphStorage`,`AGEStorage` | `NetworkXStorage` |
+| **graph_storage** | `str` | Storage type for graph edges and nodes. Supported types: `NetworkXStorage`,`Neo4JStorage`,`PGGraphStorage`,`MemgraphStorage`,`TigerGraphStorage` | `NetworkXStorage` |
 | **doc_status_storage** | `str` | Storage type for documents process status. Supported types: `JsonDocStatusStorage`,`PGDocStatusStorage`,`MongoDocStatusStorage` | `JsonDocStatusStorage` |
 | **chunk_token_size** | `int` | Maximum token size per chunk when splitting documents | `1200` |
 | **chunk_overlap_token_size** | `int` | Overlap token size between two chunks when splitting documents | `100` |
@ -791,7 +791,8 @@ MongoKVStorage   MongoDB
 NetworkXStorage      NetworkX (default)
 Neo4JStorage         Neo4J
 PGGraphStorage       PostgreSQL with AGE plugin
-MemgraphStorage.     Memgraph
+MemgraphStorage      Memgraph
+TigerGraphStorage    TigerGraph
 ```

 > Testing has shown that Neo4J delivers superior performance in production environments compared to PostgreSQL with AGE plugin.
@ -936,6 +937,44 @@ async def initialize_rag():

 </details>

+<details>
+<summary> <b>Using TigerGraph for Storage</b> </summary>
+
+* TigerGraph is a high-performance, distributed graph database with native GSQL query language.
+* You can run TigerGraph locally using Docker for easy testing:
+* See: https://www.tigergraph.com/developer/
+
+```python
+export TIGERGRAPH_URI="http://localhost:9000"
+export TIGERGRAPH_USERNAME="tigergraph"
+export TIGERGRAPH_PASSWORD="tigergraph"
+export TIGERGRAPH_GRAPH_NAME="lightrag_graph"
+
+# Setup logger for LightRAG
+setup_logger("lightrag", level="INFO")
+
+# When you launch the project, override the default KG: NetworkX
+# by specifying graph_storage="TigerGraphStorage".
+
+# Note: Default settings use NetworkX
+# Initialize LightRAG with TigerGraph implementation.
+async def initialize_rag():
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=gpt_4o_mini_complete,  # Use gpt_4o_mini_complete LLM model
+        graph_storage="TigerGraphStorage", #<-----------override KG default
+    )
+
+    # Initialize database connections
+    await rag.initialize_storages()
+    # Initialize pipeline status for document processing
+    await initialize_pipeline_status()
+
+    return rag
+```
+
+</details>
+
 <details>
 <summary> <b>Using MongoDB Storage</b> </summary>

--- a/config.ini.example
+++ b/config.ini.example
@ -38,3 +38,9 @@ vchordrq_epsilon = 1.9

 [memgraph]
 uri = bolt://localhost:7687
+
+[tigergraph]
+uri = http://localhost:9000
+username = tigergraph
+password = your_password
+graph_name = lightrag
--- a/env.example
+++ b/env.example
@ -412,6 +412,11 @@ NEO4J_KEEP_ALIVE=true
 ### DB specific workspace should not be set, keep for compatible only
 ### NEO4J_WORKSPACE=forced_workspace_name

+###  Configuration
+TIGERGRAPH_URI=https://localhost:9000
+TIGERGRAPH_USERNAME=tigergraph
+TIGERGRAPH_PASSWORD=tigergraph
+
 ### MongoDB Configuration
 MONGO_URI=mongodb://root:root@localhost:27017/
 #MONGO_URI=mongodb+srv://xxxx
@ -461,6 +466,13 @@ MEMGRAPH_DATABASE=memgraph
 # LANGFUSE_HOST="https://cloud.langfuse.com"  # 或您的自托管实例地址
 # LANGFUSE_ENABLE_TRACE=true

+### TigerGraph Configuration
+TIGERGRAPH_URI=http://localhost:9000
+TIGERGRAPH_USERNAME=tigergraph
+TIGERGRAPH_PASSWORD='your_password'
+TIGERGRAPH_GRAPH_NAME=lightrag
+# TIGERGRAPH_WORKSPACE=forced_workspace_name
+
 ############################
 ### Evaluation Configuration
 ############################
--- a/examples/lightrag_tigergraph_demo.py
+++ b/examples/lightrag_tigergraph_demo.py
@ -0,0 +1,246 @@
+import os
+import asyncio
+import argparse
+import logging
+import logging.config
+import json
+from pathlib import Path
+from lightrag import LightRAG
+from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed
+from lightrag.kg.shared_storage import initialize_pipeline_status
+from lightrag.utils import logger, set_verbose_debug
+
+WORKING_DIR = "./dickens"
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+
+
+def configure_logging():
+    """Configure logging for the application"""
+
+    # Reset any existing handlers to ensure clean configuration
+    for logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error", "lightrag"]:
+        logger_instance = logging.getLogger(logger_name)
+        logger_instance.handlers = []
+        logger_instance.filters = []
+
+    # Get log directory path from environment variable or use current directory
+    log_dir = os.getenv("LOG_DIR", os.getcwd())
+    log_file_path = os.path.abspath(
+        os.path.join(log_dir, "lightrag_tigergraph_demo.log")
+    )
+
+    print(f"\nLightRAG TigerGraph demo log file: {log_file_path}\n")
+    os.makedirs(os.path.dirname(log_file_path), exist_ok=True)
+
+    # Get log file max size and backup count from environment variables
+    log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760))  # Default 10MB
+    log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5))  # Default 5 backups
+
+    logging.config.dictConfig(
+        {
+            "version": 1,
+            "disable_existing_loggers": False,
+            "formatters": {
+                "default": {
+                    "format": "%(levelname)s: %(message)s",
+                },
+                "detailed": {
+                    "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+                },
+            },
+            "handlers": {
+                "console": {
+                    "formatter": "default",
+                    "class": "logging.StreamHandler",
+                    "stream": "ext://sys.stderr",
+                },
+                "file": {
+                    "formatter": "detailed",
+                    "class": "logging.handlers.RotatingFileHandler",
+                    "filename": log_file_path,
+                    "maxBytes": log_max_bytes,
+                    "backupCount": log_backup_count,
+                    "encoding": "utf-8",
+                },
+            },
+            "loggers": {
+                "lightrag": {
+                    "handlers": ["console", "file"],
+                    "level": "INFO",
+                    "propagate": False,
+                },
+            },
+        }
+    )
+
+    # Set the logger level to INFO
+    logger.setLevel(logging.INFO)
+    # Enable verbose debug if needed
+    set_verbose_debug(os.getenv("VERBOSE_DEBUG", "false").lower() == "true")
+
+
+def load_json_texts(json_path: str | Path) -> list[str]:
+    """
+    Load texts from a plain JSON file.
+
+    Expects JSON array format: [{"text": "..."}, {"text": "..."}]
+
+    Args:
+        json_path: Path to JSON file
+
+    Returns:
+        List of text strings extracted from "text" field
+    """
+    json_path = Path(json_path)
+
+    if not json_path.exists():
+        raise FileNotFoundError(f"JSON file not found: {json_path}")
+
+    with open(json_path, "r", encoding="utf-8") as f:
+        data = json.load(f)
+
+    if not isinstance(data, list):
+        raise ValueError(f"Expected JSON array, got {type(data).__name__}")
+
+    texts = []
+    for item in data:
+        if isinstance(item, dict) and "text" in item:
+            texts.append(item["text"])
+        else:
+            raise ValueError(
+                f"Expected object with 'text' field, got {type(item).__name__}"
+            )
+
+    return texts
+
+
+async def initialize_rag():
+    """Initialize LightRAG with TigerGraph implementation."""
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=gpt_4o_mini_complete,  # Use gpt_4o_mini_complete LLM model
+        embedding_func=openai_embed,  # Use OpenAI embedding function
+        graph_storage="TigerGraphStorage",
+    )
+
+    # Initialize database connections
+    await rag.initialize_storages()
+    # Initialize pipeline status for document processing
+    await initialize_pipeline_status()
+
+    return rag
+
+
+async def test_ingestion(json_file=None):
+    """Test document ingestion into TigerGraph"""
+    print("=" * 60)
+    print("Initializing LightRAG with TigerGraph...")
+    print("=" * 60)
+
+    rag = await initialize_rag()
+    print(f"✓ LightRAG initialized: {type(rag)}")
+
+    # Test documents for ingestion
+    test_documents = [
+        "TigerGraph is a graph database platform designed for enterprise-scale graph analytics. It supports distributed graph processing and real-time queries.",
+        "LightRAG is a framework that combines retrieval-augmented generation with knowledge graphs. It uses graph storage backends like TigerGraph, Neo4j, and Memgraph.",
+        "Graph databases store data as nodes and edges, making them ideal for relationship-heavy data. They excel at traversing complex connections between entities.",
+    ]
+
+    print("\n" + "=" * 60)
+    print("Ingesting test documents...")
+    print("=" * 60)
+
+    # Insert documents
+    for i, doc in enumerate(test_documents, 1):
+        print(f"\n[{i}/{len(test_documents)}] Inserting document...")
+        track_id = await rag.ainsert(input=doc, file_paths=f"test_doc_{i}.txt")
+        print(f"  ✓ Document inserted with track_id: {track_id}")
+
+    # Test JSON ingestion if JSON file is provided or exists
+    if json_file:
+        json_test_file = Path(json_file)
+        if json_test_file.exists():
+            print("\n" + "=" * 60)
+            print("Ingesting JSON file...")
+            print("=" * 60)
+
+            try:
+                texts = load_json_texts(json_test_file)
+                print(f"✓ Loaded {len(texts)} texts from {json_test_file}")
+
+                for i, text in enumerate(texts, 1):
+                    print(f"\n[{i}/{len(texts)}] Inserting from JSON...")
+                    track_id = await rag.ainsert(
+                        input=text, file_paths=str(json_test_file)
+                    )
+                    print(f"  ✓ Text inserted with track_id: {track_id}")
+            except Exception as e:
+                print(f"✗ Error loading JSON file: {e}")
+                import traceback
+
+                traceback.print_exc()
+        else:
+            print(
+                f"\nℹ No JSON file found at {json_test_file} (skipping JSON ingestion test)"
+            )
+            print("  Create a test_data.json file with format:")
+            print('  [{"text": "Your text here"}, {"text": "Another text"}]')
+            print("  Or use --json-file parameter to specify a JSON file")
+
+    print("\n" + "=" * 60)
+    print("Verifying ingestion...")
+    print("=" * 60)
+
+    # Verify by checking graph stats
+    try:
+        # Get all labels (entity IDs) from the graph
+        all_labels = await rag.chunk_entity_relation_graph.get_all_labels()
+        print(f"\n✓ Found {len(all_labels)} entities in the graph")
+        if all_labels:
+            print(f"  Sample entities: {all_labels[:5]}")
+
+        # Get all nodes
+        all_nodes = await rag.chunk_entity_relation_graph.get_all_nodes()
+        print(f"✓ Found {len(all_nodes)} nodes in the graph")
+
+        # Get all edges
+        all_edges = await rag.chunk_entity_relation_graph.get_all_edges()
+        print(f"✓ Found {len(all_edges)} edges in the graph")
+
+        # Test a simple query
+        print("\n" + "=" * 60)
+        print("Testing query...")
+        print("=" * 60)
+        response = await rag.aquery("What is TigerGraph?")
+        print("\nQuery: 'What is TigerGraph?'")
+        print(f"Response: {response}")
+
+    except Exception as e:
+        print(f"\n✗ Error during verification: {e}")
+        import traceback
+
+        traceback.print_exc()
+
+    print("\n" + "=" * 60)
+    print("Ingestion test completed!")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="LightRAG TigerGraph demo",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--json-file",
+        type=str,
+        default=None,
+        help='Path to JSON file with texts to ingest (format: [{"text": "..."}, ...]). Defaults to test_data.json if not specified.',
+    )
+    args = parser.parse_args()
+
+    # Configure logging before running the main function
+    configure_logging()
+    asyncio.run(test_ingestion(json_file=args.json_file))
--- a/lightrag/kg/init.py
+++ b/lightrag/kg/init.py
@ -15,6 +15,7 @@ STORAGE_IMPLEMENTATIONS = {
            "PGGraphStorage",
            "MongoGraphStorage",
            "MemgraphStorage",
+            "TigerGraphStorage",
        ],
        "required_methods": ["upsert_node", "upsert_edge"],
    },
@ -59,6 +60,11 @@ STORAGE_ENV_REQUIREMENTS: dict[str, list[str]] = {
        "MONGO_DATABASE",
    ],
    "MemgraphStorage": ["MEMGRAPH_URI"],
+    "TigerGraphStorage": [
+        "TIGERGRAPH_URI",
+        "TIGERGRAPH_USERNAME",
+        "TIGERGRAPH_PASSWORD",
+    ],
    "AGEStorage": [
        "AGE_POSTGRES_DB",
        "AGE_POSTGRES_USER",
@ -116,6 +122,7 @@ STORAGES = {
    "FaissVectorDBStorage": ".kg.faiss_impl",
    "QdrantVectorDBStorage": ".kg.qdrant_impl",
    "MemgraphStorage": ".kg.memgraph_impl",
+    "TigerGraphStorage": ".kg.tigergraph_impl",
 }


--- a/lightrag/kg/tigergraph_impl.py
+++ b/lightrag/kg/tigergraph_impl.py
--- a/pyproject.toml
+++ b/pyproject.toml
@ -109,6 +109,7 @@ offline-storage = [
    "pymilvus>=2.6.2,<3.0.0",
    "pymongo>=4.0.0,<5.0.0",
    "asyncpg>=0.29.0,<1.0.0",
+    "pyTigerGraph>=1.9.0,<2.0.0",
    "qdrant-client>=1.11.0,<2.0.0",
 ]

--- a/requirements-offline-storage.txt
+++ b/requirements-offline-storage.txt
@ -12,5 +12,6 @@ asyncpg>=0.29.0,<1.0.0
 neo4j>=5.0.0,<7.0.0
 pymilvus>=2.6.2,<3.0.0
 pymongo>=4.0.0,<5.0.0
+pyTigerGraph>=1.9.0,<2.0.0
 qdrant-client>=1.11.0,<2.0.0
 redis>=5.0.0,<8.0.0
--- a/tests/test_graph_storage.py
+++ b/tests/test_graph_storage.py
@ -11,6 +11,7 @@ Supported graph storage types include:
 - MongoDBStorage
 - PGGraphStorage
 - MemgraphStorage
+- TigerGraphStorage
 """

 import asyncio
--- a/uv.lock
+++ b/uv.lock