From 31f4f96944044f18576510f0bbd63daebd66712d Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Mon, 25 Aug 2025 12:43:34 +0800
Subject: [PATCH] Exclude conversation history from context length calculation

---
 lightrag/operate.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/lightrag/operate.py b/lightrag/operate.py
index 859d672c..cc06801c 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -2499,15 +2499,15 @@ async def _build_query_context(
         kg_context_tokens = len(tokenizer.encode(kg_context))
 
         # Calculate actual system prompt overhead dynamically
-        # 1. Calculate conversation history tokens
+        # 1. Converstion history not included in context length calculation
         history_context = ""
-        if query_param.conversation_history:
-            history_context = get_conversation_turns(
-                query_param.conversation_history, query_param.history_turns
-            )
-        history_tokens = (
-            len(tokenizer.encode(history_context)) if history_context else 0
-        )
+        # if query_param.conversation_history:
+        #     history_context = get_conversation_turns(
+        #         query_param.conversation_history, query_param.history_turns
+        #     )
+        # history_tokens = (
+        #     len(tokenizer.encode(history_context)) if history_context else 0
+        # )
 
         # 2. Calculate system prompt template tokens (excluding context_data)
         user_prompt = query_param.user_prompt if query_param.user_prompt else ""
@@ -2542,7 +2542,7 @@ async def _build_query_context(
         available_chunk_tokens = max_total_tokens - used_tokens
 
         logger.debug(
-            f"Token allocation - Total: {max_total_tokens}, History: {history_tokens}, SysPrompt: {sys_prompt_overhead}, KG: {kg_context_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
+            f"Token allocation - Total: {max_total_tokens}, SysPrompt: {sys_prompt_overhead}, KG: {kg_context_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
         )
 
         # Apply token truncation to chunks using the dynamic limit