feat: remove unused parameter from query methods across multiple implementations

2025-08-20 15:59:05 +08:00 · 2025-08-20 15:59:05 +08:00 · 874ddda605
commit 874ddda605
parent 60564cf453
9 changed files with 29 additions and 84 deletions
--- a/lightrag/base.py
+++ b/lightrag/base.py
@ -219,7 +219,7 @@ class BaseVectorStorage(StorageNameSpace, ABC):
    @abstractmethod
    async def query(
-        self, query: str, top_k: int, ids: list[str] | None = None
+        self, query: str, top_k: int
    ) -> list[dict[str, Any]]:
        """Query the vector storage and retrieve top_k results."""
--- a/lightrag/kg/deprecated/chroma_impl.py
+++ b/lightrag/kg/deprecated/chroma_impl.py
@ -165,7 +165,7 @@ class ChromaVectorDBStorage(BaseVectorStorage):
            raise
    async def query(
-        self, query: str, top_k: int, ids: list[str] | None = None
+        self, query: str, top_k: int
    ) -> list[dict[str, Any]]:
        try:
            embedding = await self.embedding_func(
--- a/lightrag/kg/faiss_impl.py
+++ b/lightrag/kg/faiss_impl.py
@ -180,7 +180,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
        return [m["__id__"] for m in list_data]
    async def query(
-        self, query: str, top_k: int, ids: list[str] | None = None
+        self, query: str, top_k: int
    ) -> list[dict[str, Any]]:
        """
        Search by a textual query; returns top_k results with their metadata + similarity distance.
--- a/lightrag/kg/milvus_impl.py
+++ b/lightrag/kg/milvus_impl.py
@ -810,7 +810,7 @@ class MilvusVectorDBStorage(BaseVectorStorage):
        return results
    async def query(
-        self, query: str, top_k: int, ids: list[str] | None = None
+        self, query: str, top_k: int
    ) -> list[dict[str, Any]]:
        # Ensure collection is loaded before querying
        self._ensure_collection_loaded()
--- a/lightrag/kg/mongo_impl.py
+++ b/lightrag/kg/mongo_impl.py
@ -1771,7 +1771,7 @@ class MongoVectorDBStorage(BaseVectorStorage):
        return list_data
    async def query(
-        self, query: str, top_k: int, ids: list[str] | None = None
+        self, query: str, top_k: int
    ) -> list[dict[str, Any]]:
        """Queries the vector database using Atlas Vector Search."""
        # Generate the embedding
--- a/lightrag/kg/nano_vector_db_impl.py
+++ b/lightrag/kg/nano_vector_db_impl.py
@ -137,7 +137,7 @@ class NanoVectorDBStorage(BaseVectorStorage):
            )
    async def query(
-        self, query: str, top_k: int, ids: list[str] | None = None
+        self, query: str, top_k: int
    ) -> list[dict[str, Any]]:
        # Execute embedding outside of lock to avoid improve cocurrent
        embedding = await self.embedding_func(
--- a/lightrag/kg/postgres_impl.py
+++ b/lightrag/kg/postgres_impl.py
@ -2005,7 +2005,7 @@ class PGVectorStorage(BaseVectorStorage):
    #################### query method ###############
    async def query(
-        self, query: str, top_k: int, ids: list[str] | None = None
+        self, query: str, top_k: int
    ) -> list[dict[str, Any]]:
        embeddings = await self.embedding_func(
            [query], _priority=5
@ -2016,7 +2016,6 @@ class PGVectorStorage(BaseVectorStorage):
        sql = SQL_TEMPLATES[self.namespace].format(embedding_string=embedding_string)
        params = {
            "workspace": self.workspace,
            "doc_ids": ids,
            "closer_than_threshold": 1 - self.cosine_better_than_threshold,
            "top_k": top_k,
        }
@ -4578,85 +4577,31 @@ SQL_TEMPLATES = {
                      update_time = EXCLUDED.update_time
                     """,
    "relationships": """
-                     WITH relevant_chunks AS (SELECT id as chunk_id
+                SELECT r.source_id as src_id, r.target_id as tgt_id,
-                                              FROM LIGHTRAG_VDB_CHUNKS
+                       EXTRACT(EPOCH FROM r.create_time)::BIGINT as created_at
-                                              WHERE $2
+                FROM LIGHTRAG_VDB_RELATION r
-                         :: varchar [] IS NULL OR full_doc_id = ANY ($2:: varchar [])
+                WHERE r.workspace = $1
-                         )
+                  AND r.content_vector <=> '[{embedding_string}]'::vector < $2
-                        , rc AS (
+                ORDER BY r.content_vector <=> '[{embedding_string}]'::vector
-                     SELECT array_agg(chunk_id) AS chunk_arr
+                LIMIT $3
                     FROM relevant_chunks
                         ), cand AS (
                     SELECT
                         r.id, r.source_id AS src_id, r.target_id AS tgt_id, r.chunk_ids, r.create_time, r.content_vector <=> '[{embedding_string}]'::vector AS dist
                     FROM LIGHTRAG_VDB_RELATION r
                     WHERE r.workspace = $1
                     ORDER BY r.content_vector <=> '[{embedding_string}]'::vector
                         LIMIT ($4 * 50)
                         )
                     SELECT c.src_id,
                            c.tgt_id,
                            EXTRACT(EPOCH FROM c.create_time) ::BIGINT AS created_at
                     FROM cand c
                              JOIN rc ON TRUE
                     WHERE c.dist < $3
                       AND c.chunk_ids && (rc.chunk_arr::varchar[])
                     ORDER BY c.dist, c.id 
                         LIMIT $4;
                     """,
    "entities": """
-                WITH relevant_chunks AS (SELECT id as chunk_id
+                SELECT e.entity_name,
-                                         FROM LIGHTRAG_VDB_CHUNKS
+                       EXTRACT(EPOCH FROM e.create_time)::BIGINT as created_at
                                         WHERE $2
                    :: varchar [] IS NULL OR full_doc_id = ANY ($2:: varchar [])
                    )
                   , rc AS (
                SELECT array_agg(chunk_id) AS chunk_arr
                FROM relevant_chunks
                    ), cand AS (
                SELECT
                    e.id, e.entity_name, e.chunk_ids, e.create_time, e.content_vector <=> '[{embedding_string}]'::vector AS dist
                FROM LIGHTRAG_VDB_ENTITY e
                WHERE e.workspace = $1
                  AND e.content_vector <=> '[{embedding_string}]'::vector < $2
                ORDER BY e.content_vector <=> '[{embedding_string}]'::vector
-                    LIMIT ($4 * 50)
+                LIMIT $3
                    )
                SELECT c.entity_name,
                       EXTRACT(EPOCH FROM c.create_time) ::BIGINT AS created_at
                FROM cand c
                         JOIN rc ON TRUE
                WHERE c.dist < $3
                  AND c.chunk_ids && (rc.chunk_arr::varchar[])
                ORDER BY c.dist, c.id 
                    LIMIT $4;
                """,
    "chunks": """
-              WITH relevant_chunks AS (SELECT id as chunk_id
+                SELECT id, content, file_path,
-                                       FROM LIGHTRAG_VDB_CHUNKS
+                       EXTRACT(EPOCH FROM create_time)::BIGINT as created_at
-                                       WHERE $2
+                FROM LIGHTRAG_VDB_CHUNKS
-                  :: varchar [] IS NULL OR full_doc_id = ANY ($2:: varchar [])
+                WHERE workspace = $1
-                  )
+                  AND content_vector <=> '[{embedding_string}]'::vector < $2
-                 , rc AS (
+                ORDER BY content_vector <=> '[{embedding_string}]'::vector
-              SELECT array_agg(chunk_id) AS chunk_arr
+                LIMIT $3
              FROM relevant_chunks
                  ), cand AS (
              SELECT
                  id, content, file_path, create_time, content_vector <=> '[{embedding_string}]'::vector AS dist
              FROM LIGHTRAG_VDB_CHUNKS
              WHERE workspace = $1
              ORDER BY content_vector <=> '[{embedding_string}]'::vector
                  LIMIT ($4 * 50)
                  )
              SELECT c.id,
                     c.content,
                     c.file_path,
                     EXTRACT(EPOCH FROM c.create_time) ::BIGINT AS created_at
              FROM cand c
                       JOIN rc ON TRUE
              WHERE c.dist < $3
                AND c.id = ANY (rc.chunk_arr)
              ORDER BY c.dist, c.id
                  LIMIT $4;
              """,
    # DROP tables
    "drop_specifiy_table_workspace": """
--- a/lightrag/kg/qdrant_impl.py
+++ b/lightrag/kg/qdrant_impl.py
@ -200,7 +200,7 @@ class QdrantVectorDBStorage(BaseVectorStorage):
        return results
    async def query(
-        self, query: str, top_k: int, ids: list[str] | None = None
+        self, query: str, top_k: int
    ) -> list[dict[str, Any]]:
        embedding = await self.embedding_func(
            [query], _priority=5
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@ -2055,7 +2055,7 @@ async def _get_vector_context(
        # Use chunk_top_k if specified, otherwise fall back to top_k
        search_top_k = query_param.chunk_top_k or query_param.top_k
-        results = await chunks_vdb.query(query, top_k=search_top_k, ids=query_param.ids)
+        results = await chunks_vdb.query(query, top_k=search_top_k)
        if not results:
            return []
@ -2599,7 +2599,7 @@ async def _get_node_data(
    )
    results = await entities_vdb.query(
-        query, top_k=query_param.top_k, ids=query_param.ids
+        query, top_k=query_param.top_k
    )
    if not len(results):
@ -2875,7 +2875,7 @@ async def _get_edge_data(
    )
    results = await relationships_vdb.query(
-        keywords, top_k=query_param.top_k, ids=query_param.ids
+        keywords, top_k=query_param.top_k
    )
    if not len(results):