fix: renamed PostGreSQL options env variable and allowed LRU cache to be an optional env variable

(cherry picked from commit 22a7b482c5)
2025-10-06 11:56:09 +02:00 · 2025-10-06 11:56:09 +02:00 · 8f5af8199b
commit 8f5af8199b
parent 4e93c9c21d
2 changed files with 50 additions and 239 deletions
--- a/env.example
+++ b/env.example
@ -351,10 +351,13 @@ POSTGRES_IVFFLAT_LISTS=100
 # POSTGRES_SSL_ROOT_CERT=/path/to/ca-cert.pem
 # POSTGRES_SSL_CRL=/path/to/crl.pem

-### PostgreSQL Server Options (for Supabase Supavisor)
+### PostgreSQL Server Settings (for Supabase Supavisor)
 # Use this to pass extra options to the PostgreSQL connection string.
 # For Supabase, you might need to set it like this:
-# POSTGRES_SERVER_OPTIONS="options=reference%3D[project-ref]"
+# POSTGRES_SERVER_SETTINGS="options=reference%3D[project-ref]"
+
+# Default is 100 set to 0 to disable
+# POSTGRES_STATEMENT_CACHE_SIZE=100

 ### Neo4j Configuration
 NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io
--- a/lightrag/kg/postgres_impl.py
+++ b/lightrag/kg/postgres_impl.py
@ -10,7 +10,6 @@ import numpy as np
 import configparser
 import ssl
 import itertools
-import hashlib

 from lightrag.types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge

@ -33,7 +32,6 @@ from ..namespace import NameSpace, is_namespace
 from ..utils import logger
 from ..constants import GRAPH_FIELD_SEP
 from ..kg.shared_storage import get_data_init_lock, get_graph_db_lock, get_storage_lock
-from ..utils_context import get_current_tenant_id

 import pipmaster as pm

@ -78,6 +76,7 @@ class PostgreSQLDB:

        # Server settings
        self.server_settings = config.get("server_settings")
+        self.statement_cache_size = int(config.get("statement_cache_size"))

        if self.user is None or self.password is None or self.database is None:
            raise ValueError("Missing database user, password, or database")
@ -163,8 +162,13 @@ class PostgreSQLDB:
                "port": self.port,
                "min_size": 1,
                "max_size": self.max,
+                "statement_cache_size": self.statement_cache_size,
            }

+            logger.info(
+                f"PostgreSQL, statement LRU cache size set as: {self.statement_cache_size}"
+            )
+
            # Add SSL configuration if provided
            ssl_context = self._create_ssl_context()
            if ssl_context is not None:
@ -237,40 +241,23 @@ class PostgreSQLDB:
        """Set the Apache AGE environment and creates a graph if it does not exist.

        This method:
-        - Loads the AGE extension into the current session (required for Cypher functions).
        - Sets the PostgreSQL `search_path` to include `ag_catalog`, ensuring that Apache AGE functions can be used without specifying the schema.
        - Attempts to create a new graph with the provided `graph_name` if it does not already exist.
        - Silently ignores errors related to the graph already existing.

        """
        try:
-            # Load AGE extension - required for Cypher functions to work
-            await connection.execute("LOAD 'age'")  # type: ignore
            await connection.execute(  # type: ignore
                'SET search_path = ag_catalog, "$user", public'
            )
-            
-            # Check if graph exists first to avoid error logs
-            exists = await connection.fetchval(
-                "SELECT count(*) FROM ag_catalog.ag_graph WHERE name = $1", graph_name
+            await connection.execute(  # type: ignore
+                f"select create_graph('{graph_name}')"
            )
-            
-            if exists == 0:
-                await connection.execute(  # type: ignore
-                    f"select create_graph('{graph_name}')"
-                )
        except (
            asyncpg.exceptions.InvalidSchemaNameError,
            asyncpg.exceptions.UniqueViolationError,
-            asyncpg.exceptions.DuplicateObjectError,  # Graph already exists
        ):
            pass
-        except Exception as e:
-            # Handle "already exists" error message for AGE graphs
-            if "already exists" in str(e):
-                pass
-            else:
-                raise

    async def _migrate_llm_cache_schema(self):
        """Migrate LLM cache schema: add new columns and remove deprecated mode field"""
@ -1089,7 +1076,9 @@ class PostgreSQLDB:
                    try:
                        # Create index for id column
                        index_name = f"idx_{table_name.lower()}_id"
-                        create_index_sql = f"CREATE INDEX {index_name} ON {table_name}(id)"
+                        create_index_sql = (
+                            f"CREATE INDEX {index_name} ON {table_name}(id)"
+                        )
                        await self.execute(create_index_sql)
                        logger.info(f"Created index {index_name} on table {table_name}")

@ -1145,11 +1134,6 @@ class PostgreSQLDB:
                "sql": "CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_lightrag_doc_status_workspace_file_path ON LIGHTRAG_DOC_STATUS (workspace, file_path)",
                "description": "Index for workspace + file_path sorting",
            },
-            {
-                "name": "idx_lightrag_doc_status_workspace_external_id",
-                "sql": "CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_lightrag_doc_status_workspace_external_id ON LIGHTRAG_DOC_STATUS (workspace, (metadata->>'external_id')) WHERE metadata->>'external_id' IS NOT NULL",
-                "description": "Index for workspace + external_id for idempotency lookups",
-            },
        ]

        for index in indexes:
@ -1265,11 +1249,6 @@ class PostgreSQLDB:
        graph_name: str | None = None,
    ) -> dict[str, Any] | None | list[dict[str, Any]]:
        async with self.pool.acquire() as connection:  # type: ignore
-            # Set tenant context if available
-            tenant_id = get_current_tenant_id()
-            if tenant_id:
-                await connection.execute(f"SET app.current_tenant = '{tenant_id}'")
-
            if with_age and graph_name:
                await self.configure_age(connection, graph_name)  # type: ignore
            elif with_age and not graph_name:
@ -1277,15 +1256,24 @@ class PostgreSQLDB:

            try:
                if params:
-                    if multirows:
-                        return await connection.fetch(sql, *params)
-                    else:
-                        return await connection.fetchrow(sql, *params)
+                    rows = await connection.fetch(sql, *params)
                else:
-                    if multirows:
-                        return await connection.fetch(sql)
+                    rows = await connection.fetch(sql)
+
+                if multirows:
+                    if rows:
+                        columns = [col for col in rows[0].keys()]
+                        data = [dict(zip(columns, row)) for row in rows]
                    else:
-                        return await connection.fetchrow(sql)
+                        data = []
+                else:
+                    if rows:
+                        columns = rows[0].keys()
+                        data = dict(zip(columns, rows[0]))
+                    else:
+                        data = None
+
+                return data
            except Exception as e:
                logger.error(f"PostgreSQL database, error:{e}")
                raise
@ -1301,11 +1289,6 @@ class PostgreSQLDB:
    ):
        try:
            async with self.pool.acquire() as connection:  # type: ignore
-                # Set tenant context if available
-                tenant_id = get_current_tenant_id()
-                if tenant_id:
-                    await connection.execute(f"SET app.current_tenant = '{tenant_id}'")
-
                if with_age and graph_name:
                    await self.configure_age(connection, graph_name)
                elif with_age and not graph_name:
@ -1320,7 +1303,6 @@ class PostgreSQLDB:
            asyncpg.exceptions.DuplicateTableError,
            asyncpg.exceptions.DuplicateObjectError,  # Catch "already exists" error
            asyncpg.exceptions.InvalidSchemaNameError,  # Also catch for AGE extension "already exists"
-            # asyncpg.exceptions.UndefinedTableError,  # Catch "relation does not exist" for index creation
        ) as e:
            if ignore_if_exists:
                # If the flag is set, just ignore these specific errors
@ -1415,9 +1397,13 @@ class ClientManager:
            ),
            # Server settings for Supabase
            "server_settings": os.environ.get(
-                "POSTGRES_SERVER_OPTIONS",
+                "POSTGRES_SERVER_SETTINGS",
                config.get("postgres", "server_options", fallback=None),
            ),
+            "statement_cache_size": os.environ.get(
+                "POSTGRES_STATEMENT_CACHE_SIZE",
+                config.get("postgres", "statement_cache_size", fallback=None),
+            ),
        }

    @classmethod
@ -1471,9 +1457,6 @@ class PGKVStorage(BaseKVStorage):
                # Use "default" for compatibility (lowest priority)
                self.workspace = "default"

-            # Apply multi-tenant isolation
-            self.workspace = self._get_composite_workspace()
-
    async def finalize(self):
        async with get_storage_lock():
            if self.db is not None:
@ -1524,7 +1507,6 @@ class PGKVStorage(BaseKVStorage):
            if is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS):
                processed_results = {}
                for row in results:
-                    row = dict(row)
                    llm_cache_list = row.get("llm_cache_list", [])
                    if isinstance(llm_cache_list, str):
                        try:
@ -1545,7 +1527,6 @@ class PGKVStorage(BaseKVStorage):
            if is_namespace(self.namespace, NameSpace.KV_STORE_FULL_ENTITIES):
                processed_results = {}
                for row in results:
-                    row = dict(row)
                    entity_names = row.get("entity_names", [])
                    if isinstance(entity_names, str):
                        try:
@ -1566,7 +1547,6 @@ class PGKVStorage(BaseKVStorage):
            if is_namespace(self.namespace, NameSpace.KV_STORE_FULL_RELATIONS):
                processed_results = {}
                for row in results:
-                    row = dict(row)
                    relation_pairs = row.get("relation_pairs", [])
                    if isinstance(relation_pairs, str):
                        try:
@ -1596,9 +1576,6 @@ class PGKVStorage(BaseKVStorage):
        sql = SQL_TEMPLATES["get_by_id_" + self.namespace]
        params = {"workspace": self.workspace, "id": id}
        response = await self.db.query(sql, list(params.values()))
-        
-        if response:
-            response = dict(response)

        if response and is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS):
            # Parse llm_cache_list JSON string back to list
@ -1679,9 +1656,6 @@ class PGKVStorage(BaseKVStorage):
        )
        params = {"workspace": self.workspace}
        results = await self.db.query(sql, list(params.values()), multirows=True)
-        
-        if results:
-            results = [dict(r) for r in results]

        if results and is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS):
            # Parse llm_cache_list JSON string back to list for each result
@ -1862,28 +1836,6 @@ class PGKVStorage(BaseKVStorage):
                    "update_time": current_time,
                }
                await self.db.execute(upsert_sql, _data)
-        elif is_namespace(self.namespace, NameSpace.KV_STORE_TENANTS):
-            for k, v in data.items():
-                upsert_sql = SQL_TEMPLATES["upsert_tenants"]
-                _data = {
-                    "workspace": self.workspace,
-                    "id": k,
-                    "data": json.dumps(v),
-                    "create_time": v.get("create_time"),
-                    "update_time": v.get("update_time"),
-                }
-                await self.db.execute(upsert_sql, _data)
-        elif is_namespace(self.namespace, NameSpace.KV_STORE_KNOWLEDGE_BASES):
-            for k, v in data.items():
-                upsert_sql = SQL_TEMPLATES["upsert_knowledge_bases"]
-                _data = {
-                    "workspace": self.workspace,
-                    "id": k,
-                    "data": json.dumps(v),
-                    "create_time": v.get("create_time"),
-                    "update_time": v.get("update_time"),
-                }
-                await self.db.execute(upsert_sql, _data)

    async def index_done_callback(self) -> None:
        # PG handles persistence automatically
@ -1971,9 +1923,6 @@ class PGVectorStorage(BaseVectorStorage):
                # Use "default" for compatibility (lowest priority)
                self.workspace = "default"

-            # Apply multi-tenant isolation
-            self.workspace = self._get_composite_workspace()
-
    async def finalize(self):
        async with get_storage_lock():
            if self.db is not None:
@ -2521,62 +2470,6 @@ class PGDocStatusStorage(DocStatusStorage):
                track_id=result[0].get("track_id"),
            )

-    async def get_doc_by_external_id(self, external_id: str) -> Union[dict[str, Any], None]:
-        """Get document by external_id for idempotency support.
-        
-        Uses indexed lookup on metadata->>'external_id' for efficient retrieval.
-
-        Args:
-            external_id: The external unique identifier to search for
-
-        Returns:
-            Union[dict[str, Any], None]: Document data if found, None otherwise
-        """
-        sql = """
-            SELECT * FROM LIGHTRAG_DOC_STATUS 
-            WHERE workspace=$1 AND metadata->>'external_id' = $2
-        """
-        params = {"workspace": self.workspace, "external_id": external_id}
-        result = await self.db.query(sql, list(params.values()), True)
-
-        if result is None or result == []:
-            return None
-        else:
-            # Parse chunks_list JSON string back to list
-            chunks_list = result[0].get("chunks_list", [])
-            if isinstance(chunks_list, str):
-                try:
-                    chunks_list = json.loads(chunks_list)
-                except json.JSONDecodeError:
-                    chunks_list = []
-
-            # Parse metadata JSON string back to dict
-            metadata = result[0].get("metadata", {})
-            if isinstance(metadata, str):
-                try:
-                    metadata = json.loads(metadata)
-                except json.JSONDecodeError:
-                    metadata = {}
-
-            # Convert datetime objects to ISO format strings with timezone info
-            created_at = self._format_datetime_with_timezone(result[0]["created_at"])
-            updated_at = self._format_datetime_with_timezone(result[0]["updated_at"])
-
-            return dict(
-                id=result[0]["id"],
-                content_length=result[0]["content_length"],
-                content_summary=result[0]["content_summary"],
-                status=result[0]["status"],
-                chunks_count=result[0]["chunks_count"],
-                created_at=created_at,
-                updated_at=updated_at,
-                file_path=result[0]["file_path"],
-                chunks_list=chunks_list,
-                metadata=metadata,
-                error_msg=result[0].get("error_msg"),
-                track_id=result[0].get("track_id"),
-            )
-
    async def get_status_counts(self) -> dict[str, int]:
        """Get counts of documents in each status"""
        sql = """SELECT status as "status", COUNT(1) as "count"
@ -2700,7 +2593,7 @@ class PGDocStatusStorage(DocStatusStorage):

    async def get_docs_paginated(
        self,
-        status_filter: DocStatus | None,
+        status_filter: DocStatus | None = None,
        page: int = 1,
        page_size: int = 50,
        sort_field: str = "updated_at",
@ -3013,19 +2906,7 @@ class PGGraphStorage(BaseGraphStorage):
            # Ensure names comply with PostgreSQL identifier specifications
            safe_workspace = re.sub(r"[^a-zA-Z0-9_]", "_", workspace.strip())
            safe_namespace = re.sub(r"[^a-zA-Z0-9_]", "_", namespace)
-            graph_name = f"{safe_workspace}_{safe_namespace}"
-            
-            # Ensure graph name starts with a letter (AGE requirement)
-            if not graph_name[0].isalpha():
-                graph_name = f"g_{graph_name}"
-            
-            # PostgreSQL identifier limit is 63 bytes
-            if len(graph_name) > 63:
-                # Use MD5 hash to ensure uniqueness and fit within limit
-                hash_object = hashlib.md5(graph_name.encode())
-                graph_name = f"g_{hash_object.hexdigest()}"
-                
-            return graph_name
+            return f"{safe_workspace}_{safe_namespace}"
        else:
            # When the workspace is "default", use the namespace directly (for backward compatibility with legacy implementations)
            return re.sub(r"[^a-zA-Z0-9_]", "_", namespace)
@ -3108,35 +2989,6 @@ class PGGraphStorage(BaseGraphStorage):
                    graph_name=self.graph_name,
                )

-            # Verify that essential labels exist by checking the ag_label catalog
-            # This helps catch cases where label creation silently failed
-            try:
-                async with self.db.pool.acquire() as connection:
-                    await connection.execute("LOAD 'age'")  # Required for AGE functions
-                    await connection.execute('SET search_path = ag_catalog, "$user", public')
-                    # Check if 'base' label exists for this graph
-                    result = await connection.fetchrow(
-                        """
-                        SELECT l.name 
-                        FROM ag_catalog.ag_label l
-                        JOIN ag_catalog.ag_graph g ON l.graph = g.graphid
-                        WHERE l.name = 'base' AND g.name = $1
-                        """,
-                        self.graph_name
-                    )
-                    if result is None:
-                        logger.warning(
-                            f"[{self.workspace}] 'base' vlabel not found for graph '{self.graph_name}', attempting to create..."
-                        )
-                        # Retry creating the vlabel
-                        await connection.execute(
-                            f"SELECT create_vlabel('{self.graph_name}', 'base')"
-                        )
-                        logger.info(f"[{self.workspace}] Successfully created 'base' vlabel for graph '{self.graph_name}'")
-            except Exception as e:
-                if "already exists" not in str(e):
-                    logger.error(f"[{self.workspace}] Failed to verify/create 'base' vlabel: {e}")
-
    async def finalize(self):
        async with get_graph_db_lock():
            if self.db is not None:
@ -3303,7 +3155,6 @@ class PGGraphStorage(BaseGraphStorage):
        Returns:
            list[dict[str, Any]]: a list of dictionaries containing the result set
        """
-        logger.info(f"[{self.workspace}] Executing query: {query}")
        try:
            if readonly:
                data = await self.db.query(
@ -3494,6 +3345,7 @@ class PGGraphStorage(BaseGraphStorage):
            raise

    @retry(
+        stop=stop_after_attempt(3),
        wait=wait_exponential(multiplier=1, min=4, max=10),
        retry=retry_if_exception_type((PGGraphQueryException,)),
    )
@ -3749,10 +3601,8 @@ class PGGraphStorage(BaseGraphStorage):
                node_id = row["node_id"]
                if not node_id:
                    continue
-                out_degree = out_degrees.get(node_id, 0)
-                in_degree = in_degrees.get(node_id, 0)
-                out_degrees[node_id] = out_degree + int(row.get("out_degree", 0) or 0)
-                in_degrees[node_id] = in_degree + int(row.get("in_degree", 0) or 0)
+                out_degrees[node_id] = int(row.get("out_degree", 0) or 0)
+                in_degrees[node_id] = int(row.get("in_degree", 0) or 0)

        degrees_dict = {}
        for node_id in node_ids:
@ -3856,13 +3706,15 @@ class PGGraphStorage(BaseGraphStorage):
            SELECT * FROM cypher({dollar_quote(self.graph_name)}::name,
                                 {dollar_quote(forward_cypher)}::cstring,
                                 $1::agtype)
-              AS (source text, target text, edge_properties agtype)"""
+              AS (source text, target text, edge_properties agtype)
+            """

            sql_bwd = f"""
            SELECT * FROM cypher({dollar_quote(self.graph_name)}::name,
                                 {dollar_quote(backward_cypher)}::cstring,
                                 $1::agtype)
-              AS (source text, target text, edge_properties agtype)"""
+              AS (source text, target text, edge_properties agtype)
+            """

            pg_params = {"params": json.dumps({"pairs": pairs}, ensure_ascii=False)}

@ -4034,8 +3886,7 @@ class PGGraphStorage(BaseGraphStorage):
                            f"[{self.workspace}] Failed to parse node string in batch: {node_dict}"
                        )

-                # Add node id (entity_id) to the dictionary for easier access
-                node_dict["id"] = node_dict.get("entity_id")
+                node_dict["id"] = node_dict["entity_id"]
                nodes.append(node_dict)

        return nodes
@ -4069,7 +3920,6 @@ class PGGraphStorage(BaseGraphStorage):
                        logger.warning(
                            f"[{self.workspace}] Failed to parse edge string in batch: {edge_agtype}"
                        )
-                        edge_agtype = {}

                source_agtype = item["source"]["properties"]
                # Process string result, parse it to JSON dictionary
@ -4374,10 +4224,6 @@ class PGGraphStorage(BaseGraphStorage):
                    $$) AS (a AGTYPE, r AGTYPE, b AGTYPE)"""
                results = await self._query(query)

-                logger.info(f"[{self.workspace}] Query results count: {len(results)}")
-                if results:
-                     logger.info(f"[{self.workspace}] First result sample: {results[0]}")
-
                # Process query results, deduplicate nodes and edges
                nodes_dict = {}
                edges_dict = {}
@ -4519,13 +4365,13 @@ class PGGraphStorage(BaseGraphStorage):
                GROUP BY node_id
            )
            SELECT
-                (ag_catalog.agtype_access_operator(VARIADIC ARRAY[properties, '"entity_id"'::agtype]))::text AS label
+                (ag_catalog.agtype_access_operator(VARIADIC ARRAY[v.properties, '"entity_id"'::agtype]))::text AS label
            FROM
                node_degrees d
            JOIN
                {self.graph_name}._ag_label_vertex v ON d.node_id = v.id
            WHERE
-                ag_catalog.agtype_access_operator(VARIADIC ARRAY[properties, '"entity_id"'::agtype]) IS NOT NULL
+                ag_catalog.agtype_access_operator(VARIADIC ARRAY[v.properties, '"entity_id"'::agtype]) IS NOT NULL
            ORDER BY
                d.degree DESC,
                label ASC
@ -4615,7 +4461,7 @@ class PGGraphStorage(BaseGraphStorage):
                drop_query = f"""SELECT * FROM cypher('{self.graph_name}', $$
                                MATCH (n)
                                DETACH DELETE n
-                                $$) AS (n agtype)"""
+                                $$) AS (result agtype)"""

                await self._query(drop_query, readonly=False)
                return {
@ -4639,8 +4485,6 @@ NAMESPACE_TABLE_MAP = {
    NameSpace.VECTOR_STORE_ENTITIES: "LIGHTRAG_VDB_ENTITY",
    NameSpace.VECTOR_STORE_RELATIONSHIPS: "LIGHTRAG_VDB_RELATION",
    NameSpace.DOC_STATUS: "LIGHTRAG_DOC_STATUS",
-    NameSpace.KV_STORE_TENANTS: "LIGHTRAG_TENANTS",
-    NameSpace.KV_STORE_KNOWLEDGE_BASES: "LIGHTRAG_KNOWLEDGE_BASES",
 }


@ -4651,26 +4495,6 @@ def namespace_to_table_name(namespace: str) -> str:


 TABLES = {
-    "LIGHTRAG_TENANTS": {
-        "ddl": """CREATE TABLE LIGHTRAG_TENANTS (
-                    id VARCHAR(255),
-                    workspace VARCHAR(255),
-                    data JSONB,
-                    create_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP,
-                    update_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP,
-                    CONSTRAINT LIGHTRAG_TENANTS_PK PRIMARY KEY (workspace, id)
-                    )"""
-    },
-    "LIGHTRAG_KNOWLEDGE_BASES": {
-        "ddl": """CREATE TABLE LIGHTRAG_KNOWLEDGE_BASES (
-                    id VARCHAR(255),
-                    workspace VARCHAR(255),
-                    data JSONB,
-                    create_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP,
-                    update_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP,
-                    CONSTRAINT LIGHTRAG_KNOWLEDGE_BASES_PK PRIMARY KEY (workspace, id)
-                    )"""
-    },
    "LIGHTRAG_DOC_FULL": {
        "ddl": """CREATE TABLE LIGHTRAG_DOC_FULL (
                    id VARCHAR(255),
@ -4853,10 +4677,6 @@ SQL_TEMPLATES = {
                                 EXTRACT(EPOCH FROM update_time)::BIGINT as update_time
                                 FROM LIGHTRAG_FULL_RELATIONS WHERE workspace=$1 AND id IN ({ids})
                                """,
-    "get_by_id_tenants": """SELECT data FROM LIGHTRAG_TENANTS WHERE workspace=$1 AND id=$2""",
-    "get_by_id_knowledge_bases": """SELECT data FROM LIGHTRAG_KNOWLEDGE_BASES WHERE workspace=$1 AND id=$2""",
-    "get_by_ids_tenants": """SELECT data FROM LIGHTRAG_TENANTS WHERE workspace=$1 AND id IN ({ids})""",
-    "get_by_ids_knowledge_bases": """SELECT data FROM LIGHTRAG_KNOWLEDGE_BASES WHERE workspace=$1 AND id IN ({ids})""",
    "filter_keys": "SELECT id FROM {table_name} WHERE workspace=$1 AND id IN ({ids})",
    "upsert_doc_full": """INSERT INTO LIGHTRAG_DOC_FULL (id, content, doc_name, workspace)
                        VALUES ($1, $2, $3, $4)
@ -4904,18 +4724,6 @@ SQL_TEMPLATES = {
                      count=EXCLUDED.count,
                      update_time = EXCLUDED.update_time
                     """,
-    "upsert_tenants": """INSERT INTO LIGHTRAG_TENANTS (workspace, id, data, create_time, update_time)
-                      VALUES ($1, $2, $3, $4, $5)
-                      ON CONFLICT (workspace,id) DO UPDATE
-                      SET data=EXCLUDED.data,
-                      update_time = EXCLUDED.update_time
-                     """,
-    "upsert_knowledge_bases": """INSERT INTO LIGHTRAG_KNOWLEDGE_BASES (workspace, id, data, create_time, update_time)
-                      VALUES ($1, $2, $3, $4, $5)
-                      ON CONFLICT (workspace,id) DO UPDATE
-                      SET data=EXCLUDED.data,
-                      update_time = EXCLUDED.update_time
-                     """,
    # SQL for VectorStorage
    "upsert_chunk": """INSERT INTO LIGHTRAG_VDB_CHUNKS (workspace, id, tokens,
                      chunk_order_index, full_doc_id, content, content_vector, file_path,