From d90698305bb3d1884577749a7408d7376e6149b3 Mon Sep 17 00:00:00 2001
From: Leon Luithlen <leon@topoteretes.com>
Date: Thu, 14 Nov 2024 09:43:10 +0100
Subject: [PATCH] Simplify chunk_by_word

---
 cognee/tasks/chunks/chunk_by_word.py | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/cognee/tasks/chunks/chunk_by_word.py b/cognee/tasks/chunks/chunk_by_word.py
index 120c759e6..a93f9acdb 100644
--- a/cognee/tasks/chunks/chunk_by_word.py
+++ b/cognee/tasks/chunks/chunk_by_word.py
@@ -25,14 +25,6 @@ def chunk_by_word(data: str):
     current_chunk = ""
     i = 0
     
-    # Handle leading whitespace if any
-    while i < len(data) and (re.match(PARAGRAPH_ENDINGS, data[i]) or data[i] == " "):
-        current_chunk += data[i]
-        i += 1
-    if current_chunk:
-        yield (current_chunk, "word")
-        current_chunk = ""
-    
     while i < len(data):
         character = data[i]
             
@@ -53,12 +45,7 @@ def chunk_by_word(data: str):
             i += 1
             continue
         
-        if re.match(SENTENCE_ENDINGS, character):
-            # Check for ellipses
-            if i + 2 < len(data) and data[i:i+3] == "...":
-                current_chunk += ".."
-                i += 2
-                
+        if re.match(SENTENCE_ENDINGS, character):                
             # Look ahead for whitespace
             next_i = i + 1
             while next_i < len(data) and data[next_i] == " ":