From eaf9167fa170113ca6173a518d679ef5536647c4 Mon Sep 17 00:00:00 2001
From: Leon Luithlen <leon@topoteretes.com>
Date: Thu, 14 Nov 2024 14:19:34 +0100
Subject: [PATCH] Change chunk_by_word to collect newlines in prior words

---
 cognee/tasks/chunks/chunk_by_word.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/cognee/tasks/chunks/chunk_by_word.py b/cognee/tasks/chunks/chunk_by_word.py
index 45f11f1c2..ab4d8343e 100644
--- a/cognee/tasks/chunks/chunk_by_word.py
+++ b/cognee/tasks/chunks/chunk_by_word.py
@@ -38,23 +38,13 @@ def chunk_by_word(data: str):
     Whitespace is included with the preceding word.
     Outputs can be joined with "" to recreate the original input.
     """
-    last_processed_character = ""
     current_chunk = ""
     i = 0
     
     while i < len(data):
         character = data[i]
             
-        if re.match(PARAGRAPH_ENDINGS, character):
-            if current_chunk:
-                yield (current_chunk, "word")
-                current_chunk = ""
-            yield (character, "paragraph_end" if is_real_paragraph_end(last_processed_character, i, data) else "word")
-            i += 1
-            continue
-            
         current_chunk += character
-        last_processed_character = character
         
         if character == " ":
             yield (current_chunk, "word")