From ec40b17eeac7790960faf0d6f9844f62ea75c659 Mon Sep 17 00:00:00 2001
From: Yasiru Rangana <yasiru@formitize.com>
Date: Wed, 8 Oct 2025 14:36:08 +1100
Subject: [PATCH 1/2] feat: Add token tracking support to openai_embed function

- Add optional token_tracker parameter to openai_embed()
- Track prompt_tokens and total_tokens for embedding API calls
- Enables monitoring of embedding token usage alongside LLM calls
- Maintains backward compatibility with existing code
---
 lightrag/llm/openai.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py
index 6f80934b..f7b759ad 100644
--- a/lightrag/llm/openai.py
+++ b/lightrag/llm/openai.py
@@ -579,6 +579,7 @@ async def openai_embed(
     base_url: str | None = None,
     api_key: str | None = None,
     client_configs: dict[str, Any] | None = None,
+    token_tracker: Any | None = None,
 ) -> np.ndarray:
     """Generate embeddings for a list of texts using OpenAI's API.
 
@@ -590,6 +591,7 @@ async def openai_embed(
         client_configs: Additional configuration options for the AsyncOpenAI client.
             These will override any default configurations but will be overridden by
             explicit parameters (api_key, base_url).
+        token_tracker: Optional token usage tracker for monitoring API usage.
 
     Returns:
         A numpy array of embeddings, one per input text.
@@ -608,6 +610,14 @@ async def openai_embed(
         response = await openai_async_client.embeddings.create(
             model=model, input=texts, encoding_format="base64"
         )
+        
+        if token_tracker and hasattr(response, "usage"):
+            token_counts = {
+                "prompt_tokens": getattr(response.usage, "prompt_tokens", 0),
+                "total_tokens": getattr(response.usage, "total_tokens", 0),
+            }
+            token_tracker.add_usage(token_counts)
+        
         return np.array(
             [
                 np.array(dp.embedding, dtype=np.float32)

From ae9f4ae73fc7840a47683e3fd4ae5c4e055c9f7c Mon Sep 17 00:00:00 2001
From: Yasiru Rangana <yasiru@formitize.com>
Date: Thu, 9 Oct 2025 15:01:53 +1100
Subject: [PATCH 2/2] fix: Remove trailing whitespace for pre-commit linting

---
 lightrag/llm/openai.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py
index f7b759ad..d367abc7 100644
--- a/lightrag/llm/openai.py
+++ b/lightrag/llm/openai.py
@@ -610,14 +610,14 @@ async def openai_embed(
         response = await openai_async_client.embeddings.create(
             model=model, input=texts, encoding_format="base64"
         )
-        
+
         if token_tracker and hasattr(response, "usage"):
             token_counts = {
                 "prompt_tokens": getattr(response.usage, "prompt_tokens", 0),
                 "total_tokens": getattr(response.usage, "total_tokens", 0),
             }
             token_tracker.add_usage(token_counts)
-        
+
         return np.array(
             [
                 np.array(dp.embedding, dtype=np.float32)