Merge branch 'main' into add-Memgraph-graph-db

2025-07-05 13:14:39 +08:00 · 2025-07-05 13:14:39 +08:00 · a567601da2
commit a567601da2
parent 8ce2223e58 706da5ad23
8 changed files with 144 additions and 116 deletions
--- a/README-zh.md
+++ b/README-zh.md
@ -757,6 +757,8 @@ async def initialize_rag():
 <details>
 <summary> <b>使用Faiss进行存储</b> </summary>
 在使用Faiss向量数据库之前必须手工安装`faiss-cpu`或`faiss-gpu`。
 - 安装所需依赖：
--- a/README.md
+++ b/README.md
@ -819,6 +819,8 @@ For production level scenarios you will most likely want to leverage an enterpri
 <details>
 <summary> <b>Using Faiss for Storage</b> </summary>
 You must manually install faiss-cpu or faiss-gpu before using FAISS vector db.
 Manually install `faiss-cpu` or `faiss-gpu` before using FAISS vector db.
 - Install the required dependencies:
--- a/env.example
+++ b/env.example
@ -108,11 +108,28 @@ EMBEDDING_BINDING_HOST=http://localhost:11434
 # AZURE_EMBEDDING_ENDPOINT=your_endpoint
 # AZURE_EMBEDDING_API_KEY=your_api_key
 ###########################
 ### Data storage selection
 ###########################
 ### PostgreSQL
 # LIGHTRAG_KV_STORAGE=PGKVStorage
 # LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
 # LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
 # LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
 # LIGHTRAG_GRAPH_STORAGE=PGGraphStorage
 ### MongoDB
 # LIGHTRAG_KV_STORAGE=MongoKVStorage
 # LIGHTRAG_DOC_STATUS_STORAGE=MongoDocStatusStorage
 # LIGHTRAG_VECTOR_STORAGE=MongoVectorDBStorage
 # LIGHTRAG_GRAPH_STORAGE=MongoGraphStorage
 ### KV Storage
 # LIGHTRAG_KV_STORAGE=RedisKVStorage
 # LIGHTRAG_DOC_STATUS_STORAGE=RedisDocStatusStorage
 ### Vector Storage
 # LIGHTRAG_VECTOR_STORAGE=FaissVectorDBStorage
 # LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage
 ### Graph Storage
 # LIGHTRAG_GRAPH_STORAGE=Neo4JStorage
 # LIGHTRAG_GRAPH_STORAGE=MemgraphStorage
 ### PostgreSQL Configuration
 POSTGRES_HOST=localhost
--- a/lightrag/kg/deprecated/age_impl.py
+++ b/lightrag/kg/deprecated/age_impl.py
--- a/lightrag/kg/faiss_impl.py
+++ b/lightrag/kg/faiss_impl.py
@ -4,9 +4,7 @@ import asyncio
 from typing import Any, final
 import json
 import numpy as np
 from dataclasses import dataclass
 import pipmaster as pm
 from lightrag.utils import logger, compute_mdhash_id
 from lightrag.base import BaseVectorStorage
@ -17,11 +15,7 @@ from .shared_storage import (
    set_all_update_flags,
 )
-USE_GPU = os.getenv("FAISS_USE_GPU", "0") == "1"
+# You must manually install faiss-cpu or faiss-gpu before using FAISS vector db
 FAISS_PACKAGE = "faiss-gpu" if USE_GPU else "faiss-cpu"
 if not pm.is_installed(FAISS_PACKAGE):
    pm.install(FAISS_PACKAGE)
 import faiss  # type: ignore
@ -165,7 +159,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
            meta["__vector__"] = embeddings[i].tolist()
            self._id_to_meta.update({fid: meta})
-        logger.info(f"Upserted {len(list_data)} vectors into Faiss index.")
+        logger.debug(f"Upserted {len(list_data)} vectors into Faiss index.")
        return [m["__id__"] for m in list_data]
    async def query(
@ -228,7 +222,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
        2. Only one process should updating the storage at a time before index_done_callback,
           KG-storage-log should be used to avoid data corruption
        """
-        logger.info(f"Deleting {len(ids)} vectors from {self.namespace}")
+        logger.debug(f"Deleting {len(ids)} vectors from {self.namespace}")
        to_remove = []
        for cid in ids:
            fid = self._find_faiss_id_by_custom_id(cid)
@ -330,7 +324,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
        and rebuild in-memory structures so we can query.
        """
        if not os.path.exists(self._faiss_index_file):
-            logger.warning("No existing Faiss index file found. Starting fresh.")
+            logger.warning(f"No existing Faiss index file found for {self.namespace}")
            return
        try:
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@ -168,6 +168,13 @@ async def _handle_single_entity_extraction(
    # Normalize entity name
    entity_name = normalize_extracted_info(entity_name, is_entity=True)
    # Check if entity name became empty after normalization
    if not entity_name or not entity_name.strip():
        logger.warning(
            f"Entity extraction error: entity name became empty after normalization. Original: '{record_attributes[1]}'"
        )
        return None
    # Clean and validate entity type
    entity_type = clean_str(record_attributes[2]).strip('"')
    if not entity_type.strip() or entity_type.startswith('("'):
@ -209,6 +216,20 @@ async def _handle_single_relationship_extraction(
    # Normalize source and target entity names
    source = normalize_extracted_info(source, is_entity=True)
    target = normalize_extracted_info(target, is_entity=True)
    # Check if source or target became empty after normalization
    if not source or not source.strip():
        logger.warning(
            f"Relationship extraction error: source entity became empty after normalization. Original: '{record_attributes[1]}'"
        )
        return None
    if not target or not target.strip():
        logger.warning(
            f"Relationship extraction error: target entity became empty after normalization. Original: '{record_attributes[2]}'"
        )
        return None
    if source == target:
        logger.debug(
            f"Relationship source and target are the same in: {record_attributes}"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,93 @@
 [build-system]
 requires = ["setuptools>=64", "wheel"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "lightrag-hku"
 dynamic = ["version"]
 authors = [
    {name = "Zirui Guo"}
 ]
 description = "LightRAG: Simple and Fast Retrieval-Augmented Generation"
 readme = "README.md"
 license = {text = "MIT"}
 requires-python = ">=3.9"
 classifiers = [
    "Development Status :: 4 - Beta",
    "Programming Language :: Python :: 3",
    "License :: OSI Approved :: MIT License",
    "Operating System :: OS Independent",
    "Intended Audience :: Developers",
    "Topic :: Software Development :: Libraries :: Python Modules",
 ]
 dependencies = [
    "aiohttp",
    "configparser",
    "dotenv",
    "future",
    "numpy",
    "pandas>=2.0.0",
    "pipmaster",
    "pydantic",
    "python-dotenv",
    "pyuca",
    "setuptools",
    "tenacity",
    "tiktoken",
    "xlsxwriter>=3.1.0",
 ]
 [project.optional-dependencies]
 api = [
    # Core dependencies
    "aiohttp",
    "configparser",
    "dotenv",
    "future",
    "numpy",
    "openai",
    "pandas>=2.0.0",
    "pipmaster",
    "pydantic",
    "python-dotenv",
    "pyuca",
    "setuptools",
    "tenacity",
    "tiktoken",
    "xlsxwriter>=3.1.0",
    # API-specific dependencies
    "aiofiles",
    "ascii_colors",
    "asyncpg",
    "distro",
    "fastapi",
    "httpcore",
    "httpx",
    "jiter",
    "passlib[bcrypt]",
    "PyJWT",
    "python-jose[cryptography]",
    "python-multipart",
    "pytz",
    "uvicorn",
 ]
 [project.scripts]
 lightrag-server = "lightrag.api.lightrag_server:main"
 lightrag-gunicorn = "lightrag.api.run_with_gunicorn:main"
 [project.urls]
 Homepage = "https://github.com/HKUDS/LightRAG"
 Documentation = "https://github.com/HKUDS/LightRAG"
 Repository = "https://github.com/HKUDS/LightRAG"
 "Bug Tracker" = "https://github.com/HKUDS/LightRAG/issues"
 [tool.setuptools]
 packages = ["lightrag"]
 include-package-data = true
 [tool.setuptools.dynamic]
 version = {attr = "lightrag.__version__"}
 [tool.setuptools.package-data]
 lightrag = ["api/webui/**/*"]
--- a/setup.py
+++ b/setup.py
@ -1,107 +1,6 @@
-import setuptools
+# Minimal setup.py for backward compatibility
-from pathlib import Path
+# Primary configuration is now in pyproject.toml
 from setuptools import setup
-# Reading the long description from README.md
+setup()
 def read_long_description():
    try:
        return Path("README.md").read_text(encoding="utf-8")
    except FileNotFoundError:
        return "A description of LightRAG is currently unavailable."
 # Retrieving metadata from __init__.py
 def retrieve_metadata():
    vars2find = ["__author__", "__version__", "__url__"]
    vars2readme = {}
    try:
        with open("./lightrag/__init__.py") as f:
            for line in f.readlines():
                for v in vars2find:
                    if line.startswith(v):
                        line = (
                            line.replace(" ", "")
                            .replace('"', "")
                            .replace("'", "")
                            .strip()
                        )
                        vars2readme[v] = line.split("=")[1]
    except FileNotFoundError:
        raise FileNotFoundError("Metadata file './lightrag/__init__.py' not found.")
    # Checking if all required variables are found
    missing_vars = [v for v in vars2find if v not in vars2readme]
    if missing_vars:
        raise ValueError(
            f"Missing required metadata variables in __init__.py: {missing_vars}"
        )
    return vars2readme
 # Reading dependencies from requirements.txt
 def read_requirements(file_path="requirements.txt"):
    deps = []
    try:
        with open(file_path) as f:
            deps = [
                line.strip() for line in f if line.strip() and not line.startswith("#")
            ]
    except FileNotFoundError:
        print(f"Warning: '{file_path}' not found. No dependencies will be installed.")
    return deps
 def read_api_requirements():
    return read_requirements("lightrag/api/requirements.txt")
 def read_extra_requirements():
    return read_requirements("lightrag/tools/lightrag_visualizer/requirements.txt")
 metadata = retrieve_metadata()
 long_description = read_long_description()
 requirements = read_requirements()
 setuptools.setup(
    name="lightrag-hku",
    url=metadata["__url__"],
    version=metadata["__version__"],
    author=metadata["__author__"],
    description="LightRAG: Simple and Fast Retrieval-Augmented Generation",
    long_description=long_description,
    long_description_content_type="text/markdown",
    packages=setuptools.find_packages(
        exclude=("tests*", "docs*")
    ),  # Automatically find packages
    classifiers=[
        "Development Status :: 4 - Beta",
        "Programming Language :: Python :: 3",
        "License :: OSI Approved :: MIT License",
        "Operating System :: OS Independent",
        "Intended Audience :: Developers",
        "Topic :: Software Development :: Libraries :: Python Modules",
    ],
    python_requires=">=3.9",
    install_requires=requirements,
    include_package_data=True,  # Includes non-code files from MANIFEST.in
    project_urls={  # Additional project metadata
        "Documentation": metadata.get("__url__", ""),
        "Source": metadata.get("__url__", ""),
        "Tracker": f"{metadata.get('__url__', '')}/issues"
        if metadata.get("__url__")
        else "",
    },
    extras_require={
        "api": requirements + read_api_requirements(),
        "tools": read_extra_requirements(),  # API requirements as optional
    },
    entry_points={
        "console_scripts": [
            "lightrag-server=lightrag.api.lightrag_server:main [api]",
            "lightrag-gunicorn=lightrag.api.run_with_gunicorn:main [api]",
            "lightrag-viewer=lightrag.tools.lightrag_visualizer.graph_visualizer:main [tools]",
        ],
    },
 )