Merge branch 'main' into add-Memgraph-graph-db

This commit is contained in:
yangdx 2025-07-05 13:14:39 +08:00
commit a567601da2
8 changed files with 144 additions and 116 deletions

View file

@ -757,6 +757,8 @@ async def initialize_rag():
<details>
<summary> <b>使用Faiss进行存储</b> </summary>
在使用Faiss向量数据库之前必须手工安装`faiss-cpu``faiss-gpu`
- 安装所需依赖:

View file

@ -819,6 +819,8 @@ For production level scenarios you will most likely want to leverage an enterpri
<details>
<summary> <b>Using Faiss for Storage</b> </summary>
You must manually install faiss-cpu or faiss-gpu before using FAISS vector db.
Manually install `faiss-cpu` or `faiss-gpu` before using FAISS vector db.
- Install the required dependencies:

View file

@ -108,11 +108,28 @@ EMBEDDING_BINDING_HOST=http://localhost:11434
# AZURE_EMBEDDING_ENDPOINT=your_endpoint
# AZURE_EMBEDDING_API_KEY=your_api_key
###########################
### Data storage selection
###########################
### PostgreSQL
# LIGHTRAG_KV_STORAGE=PGKVStorage
# LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
# LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
# LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
# LIGHTRAG_GRAPH_STORAGE=PGGraphStorage
### MongoDB
# LIGHTRAG_KV_STORAGE=MongoKVStorage
# LIGHTRAG_DOC_STATUS_STORAGE=MongoDocStatusStorage
# LIGHTRAG_VECTOR_STORAGE=MongoVectorDBStorage
# LIGHTRAG_GRAPH_STORAGE=MongoGraphStorage
### KV Storage
# LIGHTRAG_KV_STORAGE=RedisKVStorage
# LIGHTRAG_DOC_STATUS_STORAGE=RedisDocStatusStorage
### Vector Storage
# LIGHTRAG_VECTOR_STORAGE=FaissVectorDBStorage
# LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage
### Graph Storage
# LIGHTRAG_GRAPH_STORAGE=Neo4JStorage
# LIGHTRAG_GRAPH_STORAGE=MemgraphStorage
### PostgreSQL Configuration
POSTGRES_HOST=localhost

View file

@ -4,9 +4,7 @@ import asyncio
from typing import Any, final
import json
import numpy as np
from dataclasses import dataclass
import pipmaster as pm
from lightrag.utils import logger, compute_mdhash_id
from lightrag.base import BaseVectorStorage
@ -17,11 +15,7 @@ from .shared_storage import (
set_all_update_flags,
)
USE_GPU = os.getenv("FAISS_USE_GPU", "0") == "1"
FAISS_PACKAGE = "faiss-gpu" if USE_GPU else "faiss-cpu"
if not pm.is_installed(FAISS_PACKAGE):
pm.install(FAISS_PACKAGE)
# You must manually install faiss-cpu or faiss-gpu before using FAISS vector db
import faiss # type: ignore
@ -165,7 +159,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
meta["__vector__"] = embeddings[i].tolist()
self._id_to_meta.update({fid: meta})
logger.info(f"Upserted {len(list_data)} vectors into Faiss index.")
logger.debug(f"Upserted {len(list_data)} vectors into Faiss index.")
return [m["__id__"] for m in list_data]
async def query(
@ -228,7 +222,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
2. Only one process should updating the storage at a time before index_done_callback,
KG-storage-log should be used to avoid data corruption
"""
logger.info(f"Deleting {len(ids)} vectors from {self.namespace}")
logger.debug(f"Deleting {len(ids)} vectors from {self.namespace}")
to_remove = []
for cid in ids:
fid = self._find_faiss_id_by_custom_id(cid)
@ -330,7 +324,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
and rebuild in-memory structures so we can query.
"""
if not os.path.exists(self._faiss_index_file):
logger.warning("No existing Faiss index file found. Starting fresh.")
logger.warning(f"No existing Faiss index file found for {self.namespace}")
return
try:

View file

@ -168,6 +168,13 @@ async def _handle_single_entity_extraction(
# Normalize entity name
entity_name = normalize_extracted_info(entity_name, is_entity=True)
# Check if entity name became empty after normalization
if not entity_name or not entity_name.strip():
logger.warning(
f"Entity extraction error: entity name became empty after normalization. Original: '{record_attributes[1]}'"
)
return None
# Clean and validate entity type
entity_type = clean_str(record_attributes[2]).strip('"')
if not entity_type.strip() or entity_type.startswith('("'):
@ -209,6 +216,20 @@ async def _handle_single_relationship_extraction(
# Normalize source and target entity names
source = normalize_extracted_info(source, is_entity=True)
target = normalize_extracted_info(target, is_entity=True)
# Check if source or target became empty after normalization
if not source or not source.strip():
logger.warning(
f"Relationship extraction error: source entity became empty after normalization. Original: '{record_attributes[1]}'"
)
return None
if not target or not target.strip():
logger.warning(
f"Relationship extraction error: target entity became empty after normalization. Original: '{record_attributes[2]}'"
)
return None
if source == target:
logger.debug(
f"Relationship source and target are the same in: {record_attributes}"

93
pyproject.toml Normal file
View file

@ -0,0 +1,93 @@
[build-system]
requires = ["setuptools>=64", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "lightrag-hku"
dynamic = ["version"]
authors = [
{name = "Zirui Guo"}
]
description = "LightRAG: Simple and Fast Retrieval-Augmented Generation"
readme = "README.md"
license = {text = "MIT"}
requires-python = ">=3.9"
classifiers = [
"Development Status :: 4 - Beta",
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Intended Audience :: Developers",
"Topic :: Software Development :: Libraries :: Python Modules",
]
dependencies = [
"aiohttp",
"configparser",
"dotenv",
"future",
"numpy",
"pandas>=2.0.0",
"pipmaster",
"pydantic",
"python-dotenv",
"pyuca",
"setuptools",
"tenacity",
"tiktoken",
"xlsxwriter>=3.1.0",
]
[project.optional-dependencies]
api = [
# Core dependencies
"aiohttp",
"configparser",
"dotenv",
"future",
"numpy",
"openai",
"pandas>=2.0.0",
"pipmaster",
"pydantic",
"python-dotenv",
"pyuca",
"setuptools",
"tenacity",
"tiktoken",
"xlsxwriter>=3.1.0",
# API-specific dependencies
"aiofiles",
"ascii_colors",
"asyncpg",
"distro",
"fastapi",
"httpcore",
"httpx",
"jiter",
"passlib[bcrypt]",
"PyJWT",
"python-jose[cryptography]",
"python-multipart",
"pytz",
"uvicorn",
]
[project.scripts]
lightrag-server = "lightrag.api.lightrag_server:main"
lightrag-gunicorn = "lightrag.api.run_with_gunicorn:main"
[project.urls]
Homepage = "https://github.com/HKUDS/LightRAG"
Documentation = "https://github.com/HKUDS/LightRAG"
Repository = "https://github.com/HKUDS/LightRAG"
"Bug Tracker" = "https://github.com/HKUDS/LightRAG/issues"
[tool.setuptools]
packages = ["lightrag"]
include-package-data = true
[tool.setuptools.dynamic]
version = {attr = "lightrag.__version__"}
[tool.setuptools.package-data]
lightrag = ["api/webui/**/*"]

109
setup.py
View file

@ -1,107 +1,6 @@
import setuptools
from pathlib import Path
# Minimal setup.py for backward compatibility
# Primary configuration is now in pyproject.toml
from setuptools import setup
# Reading the long description from README.md
def read_long_description():
try:
return Path("README.md").read_text(encoding="utf-8")
except FileNotFoundError:
return "A description of LightRAG is currently unavailable."
# Retrieving metadata from __init__.py
def retrieve_metadata():
vars2find = ["__author__", "__version__", "__url__"]
vars2readme = {}
try:
with open("./lightrag/__init__.py") as f:
for line in f.readlines():
for v in vars2find:
if line.startswith(v):
line = (
line.replace(" ", "")
.replace('"', "")
.replace("'", "")
.strip()
)
vars2readme[v] = line.split("=")[1]
except FileNotFoundError:
raise FileNotFoundError("Metadata file './lightrag/__init__.py' not found.")
# Checking if all required variables are found
missing_vars = [v for v in vars2find if v not in vars2readme]
if missing_vars:
raise ValueError(
f"Missing required metadata variables in __init__.py: {missing_vars}"
)
return vars2readme
# Reading dependencies from requirements.txt
def read_requirements(file_path="requirements.txt"):
deps = []
try:
with open(file_path) as f:
deps = [
line.strip() for line in f if line.strip() and not line.startswith("#")
]
except FileNotFoundError:
print(f"Warning: '{file_path}' not found. No dependencies will be installed.")
return deps
def read_api_requirements():
return read_requirements("lightrag/api/requirements.txt")
def read_extra_requirements():
return read_requirements("lightrag/tools/lightrag_visualizer/requirements.txt")
metadata = retrieve_metadata()
long_description = read_long_description()
requirements = read_requirements()
setuptools.setup(
name="lightrag-hku",
url=metadata["__url__"],
version=metadata["__version__"],
author=metadata["__author__"],
description="LightRAG: Simple and Fast Retrieval-Augmented Generation",
long_description=long_description,
long_description_content_type="text/markdown",
packages=setuptools.find_packages(
exclude=("tests*", "docs*")
), # Automatically find packages
classifiers=[
"Development Status :: 4 - Beta",
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Intended Audience :: Developers",
"Topic :: Software Development :: Libraries :: Python Modules",
],
python_requires=">=3.9",
install_requires=requirements,
include_package_data=True, # Includes non-code files from MANIFEST.in
project_urls={ # Additional project metadata
"Documentation": metadata.get("__url__", ""),
"Source": metadata.get("__url__", ""),
"Tracker": f"{metadata.get('__url__', '')}/issues"
if metadata.get("__url__")
else "",
},
extras_require={
"api": requirements + read_api_requirements(),
"tools": read_extra_requirements(), # API requirements as optional
},
entry_points={
"console_scripts": [
"lightrag-server=lightrag.api.lightrag_server:main [api]",
"lightrag-gunicorn=lightrag.api.run_with_gunicorn:main [api]",
"lightrag-viewer=lightrag.tools.lightrag_visualizer.graph_visualizer:main [tools]",
],
},
)
setup()