diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 42f12ea51..96bfe6d32 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -10,7 +10,7 @@ repos:
     -   id: check-added-large-files
 - repo: https://github.com/astral-sh/ruff-pre-commit
   # Ruff version.
-  rev: v0.8.3
+  rev: v0.9.0
   hooks:
     # Run the linter.
     - id: ruff
diff --git a/cognee/api/v1/cognify/code_graph_pipeline.py b/cognee/api/v1/cognify/code_graph_pipeline.py
index 405cb0b40..2d077f39b 100644
--- a/cognee/api/v1/cognify/code_graph_pipeline.py
+++ b/cognee/api/v1/cognify/code_graph_pipeline.py
@@ -3,7 +3,6 @@ import logging
 from pathlib import Path
 
 from cognee.base_config import get_base_config
-from cognee.infrastructure.databases.vector.embeddings import get_embedding_engine
 from cognee.modules.cognify.config import get_cognify_config
 from cognee.modules.pipelines import run_tasks
 from cognee.modules.pipelines.tasks.Task import Task
@@ -54,8 +53,6 @@ async def run_code_graph_pipeline(repo_path, include_docs=True):
     await cognee.prune.prune_system(metadata=True)
     await create_db_and_tables()
 
-    embedding_engine = get_embedding_engine()
-
     cognee_config = get_cognify_config()
     user = await get_default_user()
 
@@ -63,11 +60,7 @@ async def run_code_graph_pipeline(repo_path, include_docs=True):
         Task(get_repo_file_dependencies),
         Task(enrich_dependency_graph),
         Task(expand_dependency_graph, task_config={"batch_size": 50}),
-        Task(
-            get_source_code_chunks,
-            embedding_model=embedding_engine.model,
-            task_config={"batch_size": 50},
-        ),
+        Task(get_source_code_chunks, task_config={"batch_size": 50}),
         Task(summarize_code, task_config={"batch_size": 50}),
         Task(add_data_points, task_config={"batch_size": 50}),
     ]
@@ -78,7 +71,7 @@ async def run_code_graph_pipeline(repo_path, include_docs=True):
             Task(ingest_data_with_metadata, dataset_name="repo_docs", user=user),
             Task(get_data_list_for_user, dataset_name="repo_docs", user=user),
             Task(classify_documents),
-            Task(extract_chunks_from_documents),
+            Task(extract_chunks_from_documents, max_tokens=cognee_config.max_tokens),
             Task(
                 extract_graph_from_data, graph_model=KnowledgeGraph, task_config={"batch_size": 50}
             ),
diff --git a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
index 5490f6b43..3543418fc 100644
--- a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
+++ b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
@@ -493,7 +493,7 @@ class Neo4jAdapter(GraphDBInterface):
 
         query_edges = f"""
         MATCH (n)-[r]->(m)
-        WHERE {where_clause} AND {where_clause.replace('n.', 'm.')}
+        WHERE {where_clause} AND {where_clause.replace("n.", "m.")}
         RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties
         """
         result_edges = await self.query(query_edges)
diff --git a/cognee/infrastructure/llm/prompts/patch_gen_kg_instructions.txt b/cognee/infrastructure/llm/prompts/patch_gen_kg_instructions.txt
index ebbb03f75..3117ac9f1 100644
--- a/cognee/infrastructure/llm/prompts/patch_gen_kg_instructions.txt
+++ b/cognee/infrastructure/llm/prompts/patch_gen_kg_instructions.txt
@@ -1,3 +1,6 @@
-I need you to solve this issue by looking at the provided edges retrieved from a knowledge graph and 
-generate a single patch file that I can apply directly to this repository using git apply. 
-Please respond with a single patch file in the following format.
\ No newline at end of file
+You are a senior software engineer. I need you to solve this issue by looking at the provided context and
+generate a single patch file that I can apply directly to this repository using git apply.
+Additionally, please make sure that you provide code only with correct syntax and
+you apply the patch on the relevant files (together with their path that you can try to find out from the github issue). Don't change the names of existing
+functions or classes, as they may be referenced from other code.
+Please respond only with a single patch file in the following format without adding any additional context or string.
diff --git a/cognee/modules/chunking/TextChunker.py b/cognee/modules/chunking/TextChunker.py
index 7bb8a1c1c..78c02b9c9 100644
--- a/cognee/modules/chunking/TextChunker.py
+++ b/cognee/modules/chunking/TextChunker.py
@@ -1,7 +1,9 @@
-from uuid import uuid5, NAMESPACE_OID
+from typing import Optional
+from uuid import NAMESPACE_OID, uuid5
+
+from cognee.tasks.chunks import chunk_by_paragraph
 
 from .models.DocumentChunk import DocumentChunk
-from cognee.tasks.chunks import chunk_by_paragraph
 
 
 class TextChunker:
@@ -10,23 +12,36 @@ class TextChunker:
 
     chunk_index = 0
     chunk_size = 0
+    token_count = 0
 
-    def __init__(self, document, get_text: callable, chunk_size: int = 1024):
+    def __init__(
+        self, document, get_text: callable, max_tokens: Optional[int] = None, chunk_size: int = 1024
+    ):
         self.document = document
         self.max_chunk_size = chunk_size
         self.get_text = get_text
+        self.max_tokens = max_tokens if max_tokens else float("inf")
+
+    def check_word_count_and_token_count(self, word_count_before, token_count_before, chunk_data):
+        word_count_fits = word_count_before + chunk_data["word_count"] <= self.max_chunk_size
+        token_count_fits = token_count_before + chunk_data["token_count"] <= self.max_tokens
+        return word_count_fits and token_count_fits
 
     def read(self):
         paragraph_chunks = []
         for content_text in self.get_text():
             for chunk_data in chunk_by_paragraph(
                 content_text,
+                self.max_tokens,
                 self.max_chunk_size,
                 batch_paragraphs=True,
             ):
-                if self.chunk_size + chunk_data["word_count"] <= self.max_chunk_size:
+                if self.check_word_count_and_token_count(
+                    self.chunk_size, self.token_count, chunk_data
+                ):
                     paragraph_chunks.append(chunk_data)
                     self.chunk_size += chunk_data["word_count"]
+                    self.token_count += chunk_data["token_count"]
                 else:
                     if len(paragraph_chunks) == 0:
                         yield DocumentChunk(
@@ -66,6 +81,7 @@ class TextChunker:
                             print(e)
                         paragraph_chunks = [chunk_data]
                         self.chunk_size = chunk_data["word_count"]
+                        self.token_count = chunk_data["token_count"]
 
                     self.chunk_index += 1
 
diff --git a/cognee/modules/chunking/models/DocumentChunk.py b/cognee/modules/chunking/models/DocumentChunk.py
index 4920e9b06..a232d50a1 100644
--- a/cognee/modules/chunking/models/DocumentChunk.py
+++ b/cognee/modules/chunking/models/DocumentChunk.py
@@ -12,6 +12,7 @@ class DocumentChunk(DataPoint):
     chunk_index: int
     cut_type: str
     is_part_of: Document
+    pydantic_type: str = "DocumentChunk"
     contains: List[Entity] = None
 
     _metadata: dict = {"index_fields": ["text"], "type": "DocumentChunk"}
diff --git a/cognee/modules/cognify/config.py b/cognee/modules/cognify/config.py
index d40410bfc..dd94d8b41 100644
--- a/cognee/modules/cognify/config.py
+++ b/cognee/modules/cognify/config.py
@@ -1,12 +1,14 @@
 from functools import lru_cache
 from pydantic_settings import BaseSettings, SettingsConfigDict
 from cognee.shared.data_models import DefaultContentPrediction, SummarizedContent
+from typing import Optional
+import os
 
 
 class CognifyConfig(BaseSettings):
     classification_model: object = DefaultContentPrediction
     summarization_model: object = SummarizedContent
-
+    max_tokens: Optional[int] = os.getenv("MAX_TOKENS")
     model_config = SettingsConfigDict(env_file=".env", extra="allow")
 
     def to_dict(self) -> dict:
diff --git a/cognee/modules/data/processing/document_types/AudioDocument.py b/cognee/modules/data/processing/document_types/AudioDocument.py
index a33d4e7fc..b7d2476b4 100644
--- a/cognee/modules/data/processing/document_types/AudioDocument.py
+++ b/cognee/modules/data/processing/document_types/AudioDocument.py
@@ -1,6 +1,9 @@
+from typing import Optional
+
 from cognee.infrastructure.llm.get_llm_client import get_llm_client
-from .Document import Document
+
 from .ChunkerMapping import ChunkerConfig
+from .Document import Document
 
 
 class AudioDocument(Document):
@@ -10,12 +13,14 @@ class AudioDocument(Document):
         result = get_llm_client().create_transcript(self.raw_data_location)
         return result.text
 
-    def read(self, chunk_size: int, chunker: str):
+    def read(self, chunk_size: int, chunker: str, max_tokens: Optional[int] = None):
         # Transcribe the audio file
 
         text = self.create_transcript()
 
         chunker_func = ChunkerConfig.get_chunker(chunker)
-        chunker = chunker_func(self, chunk_size=chunk_size, get_text=lambda: [text])
+        chunker = chunker_func(
+            self, chunk_size=chunk_size, get_text=lambda: [text], max_tokens=max_tokens
+        )
 
         yield from chunker.read()
diff --git a/cognee/modules/data/processing/document_types/Document.py b/cognee/modules/data/processing/document_types/Document.py
index 08380e809..7ecdf289e 100644
--- a/cognee/modules/data/processing/document_types/Document.py
+++ b/cognee/modules/data/processing/document_types/Document.py
@@ -1,3 +1,4 @@
+from typing import Optional
 from uuid import UUID
 
 from cognee.infrastructure.engine import DataPoint
@@ -10,5 +11,5 @@ class Document(DataPoint):
     mime_type: str
     _metadata: dict = {"index_fields": ["name"], "type": "Document"}
 
-    def read(self, chunk_size: int, chunker=str) -> str:
+    def read(self, chunk_size: int, chunker=str, max_tokens: Optional[int] = None) -> str:
         pass
diff --git a/cognee/modules/data/processing/document_types/ImageDocument.py b/cognee/modules/data/processing/document_types/ImageDocument.py
index 424cd059c..c055b8253 100644
--- a/cognee/modules/data/processing/document_types/ImageDocument.py
+++ b/cognee/modules/data/processing/document_types/ImageDocument.py
@@ -1,6 +1,9 @@
+from typing import Optional
+
 from cognee.infrastructure.llm.get_llm_client import get_llm_client
-from .Document import Document
+
 from .ChunkerMapping import ChunkerConfig
+from .Document import Document
 
 
 class ImageDocument(Document):
@@ -10,11 +13,13 @@ class ImageDocument(Document):
         result = get_llm_client().transcribe_image(self.raw_data_location)
         return result.choices[0].message.content
 
-    def read(self, chunk_size: int, chunker: str):
+    def read(self, chunk_size: int, chunker: str, max_tokens: Optional[int] = None):
         # Transcribe the image file
         text = self.transcribe_image()
 
         chunker_func = ChunkerConfig.get_chunker(chunker)
-        chunker = chunker_func(self, chunk_size=chunk_size, get_text=lambda: [text])
+        chunker = chunker_func(
+            self, chunk_size=chunk_size, get_text=lambda: [text], max_tokens=max_tokens
+        )
 
         yield from chunker.read()
diff --git a/cognee/modules/data/processing/document_types/PdfDocument.py b/cognee/modules/data/processing/document_types/PdfDocument.py
index 684fb428c..768f91264 100644
--- a/cognee/modules/data/processing/document_types/PdfDocument.py
+++ b/cognee/modules/data/processing/document_types/PdfDocument.py
@@ -1,12 +1,15 @@
+from typing import Optional
+
 from pypdf import PdfReader
-from .Document import Document
+
 from .ChunkerMapping import ChunkerConfig
+from .Document import Document
 
 
 class PdfDocument(Document):
     type: str = "pdf"
 
-    def read(self, chunk_size: int, chunker: str):
+    def read(self, chunk_size: int, chunker: str, max_tokens: Optional[int] = None):
         file = PdfReader(self.raw_data_location)
 
         def get_text():
@@ -15,7 +18,9 @@ class PdfDocument(Document):
                 yield page_text
 
         chunker_func = ChunkerConfig.get_chunker(chunker)
-        chunker = chunker_func(self, chunk_size=chunk_size, get_text=get_text)
+        chunker = chunker_func(
+            self, chunk_size=chunk_size, get_text=get_text, max_tokens=max_tokens
+        )
 
         yield from chunker.read()
 
diff --git a/cognee/modules/data/processing/document_types/TextDocument.py b/cognee/modules/data/processing/document_types/TextDocument.py
index f993ff221..b62ccd56e 100644
--- a/cognee/modules/data/processing/document_types/TextDocument.py
+++ b/cognee/modules/data/processing/document_types/TextDocument.py
@@ -1,11 +1,13 @@
-from .Document import Document
+from typing import Optional
+
 from .ChunkerMapping import ChunkerConfig
+from .Document import Document
 
 
 class TextDocument(Document):
     type: str = "text"
 
-    def read(self, chunk_size: int, chunker: str):
+    def read(self, chunk_size: int, chunker: str, max_tokens: Optional[int] = None):
         def get_text():
             with open(self.raw_data_location, mode="r", encoding="utf-8") as file:
                 while True:
@@ -18,6 +20,8 @@ class TextDocument(Document):
 
         chunker_func = ChunkerConfig.get_chunker(chunker)
 
-        chunker = chunker_func(self, chunk_size=chunk_size, get_text=get_text)
+        chunker = chunker_func(
+            self, chunk_size=chunk_size, get_text=get_text, max_tokens=max_tokens
+        )
 
         yield from chunker.read()
diff --git a/cognee/modules/data/processing/document_types/UnstructuredDocument.py b/cognee/modules/data/processing/document_types/UnstructuredDocument.py
index cd5c72e3b..1c291d0dc 100644
--- a/cognee/modules/data/processing/document_types/UnstructuredDocument.py
+++ b/cognee/modules/data/processing/document_types/UnstructuredDocument.py
@@ -1,14 +1,16 @@
 from io import StringIO
+from typing import Optional
 
 from cognee.modules.chunking.TextChunker import TextChunker
-from .Document import Document
 from cognee.modules.data.exceptions import UnstructuredLibraryImportError
 
+from .Document import Document
+
 
 class UnstructuredDocument(Document):
     type: str = "unstructured"
 
-    def read(self, chunk_size: int):
+    def read(self, chunk_size: int, chunker: str, max_tokens: Optional[int] = None) -> str:
         def get_text():
             try:
                 from unstructured.partition.auto import partition
@@ -27,6 +29,6 @@ class UnstructuredDocument(Document):
 
                 yield text
 
-        chunker = TextChunker(self, chunk_size=chunk_size, get_text=get_text)
+        chunker = TextChunker(self, chunk_size=chunk_size, get_text=get_text, max_tokens=max_tokens)
 
         yield from chunker.read()
diff --git a/cognee/modules/engine/models/Entity.py b/cognee/modules/engine/models/Entity.py
index 63a153bf2..0e57d5dba 100644
--- a/cognee/modules/engine/models/Entity.py
+++ b/cognee/modules/engine/models/Entity.py
@@ -7,5 +7,6 @@ class Entity(DataPoint):
     name: str
     is_a: EntityType
     description: str
+    pydantic_type: str = "Entity"
 
     _metadata: dict = {"index_fields": ["name"], "type": "Entity"}
diff --git a/cognee/modules/engine/models/EntityType.py b/cognee/modules/engine/models/EntityType.py
index 7225bb3ae..10799bb33 100644
--- a/cognee/modules/engine/models/EntityType.py
+++ b/cognee/modules/engine/models/EntityType.py
@@ -5,5 +5,6 @@ class EntityType(DataPoint):
     __tablename__ = "entity_type"
     name: str
     description: str
+    pydantic_type: str = "EntityType"
 
     _metadata: dict = {"index_fields": ["name"], "type": "EntityType"}
diff --git a/cognee/modules/retrieval/brute_force_triplet_search.py b/cognee/modules/retrieval/brute_force_triplet_search.py
index fdd312480..9c778505d 100644
--- a/cognee/modules/retrieval/brute_force_triplet_search.py
+++ b/cognee/modules/retrieval/brute_force_triplet_search.py
@@ -43,7 +43,7 @@ def format_triplets(edges):
         edge_info = {key: value for key, value in edge_attributes.items() if value is not None}
 
         # Create the formatted triplet
-        triplet = f"Node1: {node1_info}\n" f"Edge: {edge_info}\n" f"Node2: {node2_info}\n\n\n"
+        triplet = f"Node1: {node1_info}\nEdge: {edge_info}\nNode2: {node2_info}\n\n\n"
         triplets.append(triplet)
 
     return "".join(triplets)
diff --git a/cognee/modules/retrieval/description_to_codepart_search.py b/cognee/modules/retrieval/description_to_codepart_search.py
index ecd187907..243fdbde3 100644
--- a/cognee/modules/retrieval/description_to_codepart_search.py
+++ b/cognee/modules/retrieval/description_to_codepart_search.py
@@ -8,20 +8,27 @@ from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
 from cognee.modules.users.methods import get_default_user
 from cognee.modules.users.models import User
 from cognee.shared.utils import send_telemetry
+from cognee.api.v1.search import SearchType
+from cognee.api.v1.search.search_v2 import search
+from cognee.infrastructure.llm.get_llm_client import get_llm_client
 
 
-async def code_description_to_code_part_search(query: str, user: User = None, top_k=2) -> list:
+async def code_description_to_code_part_search(
+    query: str, include_docs=False, user: User = None, top_k=5
+) -> list:
     if user is None:
         user = await get_default_user()
 
     if user is None:
         raise PermissionError("No user found in the system. Please create a user.")
 
-    retrieved_codeparts = await code_description_to_code_part(query, user, top_k)
+    retrieved_codeparts = await code_description_to_code_part(query, user, top_k, include_docs)
     return retrieved_codeparts
 
 
-async def code_description_to_code_part(query: str, user: User, top_k: int) -> List[str]:
+async def code_description_to_code_part(
+    query: str, user: User, top_k: int, include_docs: bool = False
+) -> List[str]:
     """
     Maps a code description query to relevant code parts using a CodeGraph pipeline.
 
@@ -29,6 +36,7 @@ async def code_description_to_code_part(query: str, user: User, top_k: int) -> L
         query (str): The search query describing the code parts.
         user (User): The user performing the search.
         top_k (int): Number of codegraph descriptions to match ( num of corresponding codeparts will be higher)
+        include_docs(bool): Boolean showing whether we have the docs in the graph or not
 
     Returns:
         Set[str]: A set of unique code parts matching the query.
@@ -55,21 +63,48 @@ async def code_description_to_code_part(query: str, user: User, top_k: int) -> L
     )
 
     try:
-        results = await vector_engine.search("code_summary_text", query_text=query, limit=top_k)
-        if not results:
+        if include_docs:
+            search_results = await search(SearchType.INSIGHTS, query_text=query)
+
+            concatenated_descriptions = " ".join(
+                obj["description"]
+                for tpl in search_results
+                for obj in tpl
+                if isinstance(obj, dict) and "description" in obj
+            )
+
+            llm_client = get_llm_client()
+            context_from_documents = await llm_client.acreate_structured_output(
+                text_input=f"The retrieved context from documents is {concatenated_descriptions}.",
+                system_prompt="You are a Senior Software Engineer, summarize the context from documents"
+                f" in a way that it is gonna be provided next to codeparts as context"
+                f" while trying to solve this github issue connected to the project: {query}]",
+                response_model=str,
+            )
+
+        code_summaries = await vector_engine.search(
+            "code_summary_text", query_text=query, limit=top_k
+        )
+        if not code_summaries:
             logging.warning("No results found for query: '%s' by user: %s", query, user.id)
             return []
 
         memory_fragment = CogneeGraph()
         await memory_fragment.project_graph_from_db(
             graph_engine,
-            node_properties_to_project=["id", "type", "text", "source_code"],
+            node_properties_to_project=[
+                "id",
+                "type",
+                "text",
+                "source_code",
+                "pydantic_type",
+            ],
             edge_properties_to_project=["relationship_name"],
         )
 
         code_pieces_to_return = set()
 
-        for node in results:
+        for node in code_summaries:
             node_id = str(node.id)
             node_to_search_from = memory_fragment.get_node(node_id)
 
@@ -78,9 +113,16 @@ async def code_description_to_code_part(query: str, user: User, top_k: int) -> L
                 continue
 
             for code_file in node_to_search_from.get_skeleton_neighbours():
-                for code_file_edge in code_file.get_skeleton_edges():
-                    if code_file_edge.get_attribute("relationship_name") == "contains":
-                        code_pieces_to_return.add(code_file_edge.get_destination_node())
+                if code_file.get_attribute("pydantic_type") == "SourceCodeChunk":
+                    for code_file_edge in code_file.get_skeleton_edges():
+                        if code_file_edge.get_attribute("relationship_name") == "code_chunk_of":
+                            code_pieces_to_return.add(code_file_edge.get_destination_node())
+                elif code_file.get_attribute("pydantic_type") == "CodePart":
+                    code_pieces_to_return.add(code_file)
+                elif code_file.get_attribute("pydantic_type") == "CodeFile":
+                    for code_file_edge in code_file.get_skeleton_edges():
+                        if code_file_edge.get_attribute("relationship_name") == "contains":
+                            code_pieces_to_return.add(code_file_edge.get_destination_node())
 
         logging.info(
             "Search completed for user: %s, query: '%s'. Found %d code pieces.",
@@ -89,7 +131,14 @@ async def code_description_to_code_part(query: str, user: User, top_k: int) -> L
             len(code_pieces_to_return),
         )
 
-        return list(code_pieces_to_return)
+        context = ""
+        for code_piece in code_pieces_to_return:
+            context = context + code_piece.get_attribute("source_code")
+
+        if include_docs:
+            context = context_from_documents + context
+
+        return context
 
     except Exception as exec_error:
         logging.error(
diff --git a/cognee/shared/CodeGraphEntities.py b/cognee/shared/CodeGraphEntities.py
index 164327da0..926aae9fa 100644
--- a/cognee/shared/CodeGraphEntities.py
+++ b/cognee/shared/CodeGraphEntities.py
@@ -5,12 +5,14 @@ from cognee.infrastructure.engine import DataPoint
 class Repository(DataPoint):
     __tablename__ = "Repository"
     path: str
+    pydantic_type: str = "Repository"
     _metadata: dict = {"index_fields": [], "type": "Repository"}
 
 
 class CodeFile(DataPoint):
     __tablename__ = "codefile"
     extracted_id: str  # actually file path
+    pydantic_type: str = "CodeFile"
     source_code: Optional[str] = None
     part_of: Optional[Repository] = None
     depends_on: Optional[List["CodeFile"]] = None
@@ -22,6 +24,7 @@ class CodeFile(DataPoint):
 class CodePart(DataPoint):
     __tablename__ = "codepart"
     # part_of: Optional[CodeFile] = None
+    pydantic_type: str = "CodePart"
     source_code: Optional[str] = None
     _metadata: dict = {"index_fields": [], "type": "CodePart"}
 
@@ -30,6 +33,7 @@ class SourceCodeChunk(DataPoint):
     __tablename__ = "sourcecodechunk"
     code_chunk_of: Optional[CodePart] = None
     source_code: Optional[str] = None
+    pydantic_type: str = "SourceCodeChunk"
     previous_chunk: Optional["SourceCodeChunk"] = None
 
     _metadata: dict = {"index_fields": ["source_code"], "type": "SourceCodeChunk"}
diff --git a/cognee/shared/data_models.py b/cognee/shared/data_models.py
index d23d2841c..a36a09010 100644
--- a/cognee/shared/data_models.py
+++ b/cognee/shared/data_models.py
@@ -231,6 +231,7 @@ class SummarizedContent(BaseModel):
 
     summary: str
     description: str
+    pydantic_type: str = "SummarizedContent"
 
 
 class SummarizedFunction(BaseModel):
@@ -239,6 +240,7 @@ class SummarizedFunction(BaseModel):
     inputs: Optional[List[str]] = None
     outputs: Optional[List[str]] = None
     decorators: Optional[List[str]] = None
+    pydantic_type: str = "SummarizedFunction"
 
 
 class SummarizedClass(BaseModel):
@@ -246,6 +248,7 @@ class SummarizedClass(BaseModel):
     description: str
     methods: Optional[List[SummarizedFunction]] = None
     decorators: Optional[List[str]] = None
+    pydantic_type: str = "SummarizedClass"
 
 
 class SummarizedCode(BaseModel):
@@ -256,6 +259,7 @@ class SummarizedCode(BaseModel):
     classes: List[SummarizedClass] = []
     functions: List[SummarizedFunction] = []
     workflow_description: Optional[str] = None
+    pydantic_type: str = "SummarizedCode"
 
 
 class GraphDBType(Enum):
diff --git a/cognee/tasks/chunks/chunk_by_paragraph.py b/cognee/tasks/chunks/chunk_by_paragraph.py
index 5c95e97b7..24d566074 100644
--- a/cognee/tasks/chunks/chunk_by_paragraph.py
+++ b/cognee/tasks/chunks/chunk_by_paragraph.py
@@ -1,10 +1,18 @@
-from uuid import uuid5, NAMESPACE_OID
-from typing import Dict, Any, Iterator
+from typing import Any, Dict, Iterator, Optional, Union
+from uuid import NAMESPACE_OID, uuid5
+
+import tiktoken
+
+from cognee.infrastructure.databases.vector import get_vector_engine
+
 from .chunk_by_sentence import chunk_by_sentence
 
 
 def chunk_by_paragraph(
-    data: str, paragraph_length: int = 1024, batch_paragraphs: bool = True
+    data: str,
+    max_tokens: Optional[Union[int, float]] = None,
+    paragraph_length: int = 1024,
+    batch_paragraphs: bool = True,
 ) -> Iterator[Dict[str, Any]]:
     """
     Chunks text by paragraph while preserving exact text reconstruction capability.
@@ -15,16 +23,31 @@ def chunk_by_paragraph(
     chunk_index = 0
     paragraph_ids = []
     last_cut_type = None
+    current_token_count = 0
+    if not max_tokens:
+        max_tokens = float("inf")
+
+    vector_engine = get_vector_engine()
+    embedding_model = vector_engine.embedding_engine.model
+    embedding_model = embedding_model.split("/")[-1]
 
     for paragraph_id, sentence, word_count, end_type in chunk_by_sentence(
         data, maximum_length=paragraph_length
     ):
         # Check if this sentence would exceed length limit
-        if current_word_count > 0 and current_word_count + word_count > paragraph_length:
+
+        tokenizer = tiktoken.encoding_for_model(embedding_model)
+        token_count = len(tokenizer.encode(sentence))
+
+        if current_word_count > 0 and (
+            current_word_count + word_count > paragraph_length
+            or current_token_count + token_count > max_tokens
+        ):
             # Yield current chunk
             chunk_dict = {
                 "text": current_chunk,
                 "word_count": current_word_count,
+                "token_count": current_token_count,
                 "chunk_id": uuid5(NAMESPACE_OID, current_chunk),
                 "paragraph_ids": paragraph_ids,
                 "chunk_index": chunk_index,
@@ -37,11 +60,13 @@ def chunk_by_paragraph(
             paragraph_ids = []
             current_chunk = ""
             current_word_count = 0
+            current_token_count = 0
             chunk_index += 1
 
         paragraph_ids.append(paragraph_id)
         current_chunk += sentence
         current_word_count += word_count
+        current_token_count += token_count
 
         # Handle end of paragraph
         if end_type in ("paragraph_end", "sentence_cut") and not batch_paragraphs:
@@ -49,6 +74,7 @@ def chunk_by_paragraph(
             chunk_dict = {
                 "text": current_chunk,
                 "word_count": current_word_count,
+                "token_count": current_token_count,
                 "paragraph_ids": paragraph_ids,
                 "chunk_id": uuid5(NAMESPACE_OID, current_chunk),
                 "chunk_index": chunk_index,
@@ -58,6 +84,7 @@ def chunk_by_paragraph(
             paragraph_ids = []
             current_chunk = ""
             current_word_count = 0
+            current_token_count = 0
             chunk_index += 1
 
         last_cut_type = end_type
@@ -67,6 +94,7 @@ def chunk_by_paragraph(
         chunk_dict = {
             "text": current_chunk,
             "word_count": current_word_count,
+            "token_count": current_token_count,
             "chunk_id": uuid5(NAMESPACE_OID, current_chunk),
             "paragraph_ids": paragraph_ids,
             "chunk_index": chunk_index,
diff --git a/cognee/tasks/documents/extract_chunks_from_documents.py b/cognee/tasks/documents/extract_chunks_from_documents.py
index 437d2a3e4..5ce224002 100644
--- a/cognee/tasks/documents/extract_chunks_from_documents.py
+++ b/cognee/tasks/documents/extract_chunks_from_documents.py
@@ -1,9 +1,16 @@
+from typing import Optional
+
 from cognee.modules.data.processing.document_types.Document import Document
 
 
 async def extract_chunks_from_documents(
-    documents: list[Document], chunk_size: int = 1024, chunker="text_chunker"
+    documents: list[Document],
+    chunk_size: int = 1024,
+    chunker="text_chunker",
+    max_tokens: Optional[int] = None,
 ):
     for document in documents:
-        for document_chunk in document.read(chunk_size=chunk_size, chunker=chunker):
+        for document_chunk in document.read(
+            chunk_size=chunk_size, chunker=chunker, max_tokens=max_tokens
+        ):
             yield document_chunk
diff --git a/cognee/tasks/repo_processor/extract_code_parts.py b/cognee/tasks/repo_processor/extract_code_parts.py
index f25146232..c8e478692 100644
--- a/cognee/tasks/repo_processor/extract_code_parts.py
+++ b/cognee/tasks/repo_processor/extract_code_parts.py
@@ -1,6 +1,5 @@
 from typing import Dict, List
 import parso
-
 import logging
 
 logger = logging.getLogger(__name__)
diff --git a/cognee/tasks/repo_processor/get_local_dependencies.py b/cognee/tasks/repo_processor/get_local_dependencies.py
index b0ac2829f..888f847da 100644
--- a/cognee/tasks/repo_processor/get_local_dependencies.py
+++ b/cognee/tasks/repo_processor/get_local_dependencies.py
@@ -9,7 +9,6 @@ import aiofiles
 import jedi
 import parso
 from parso.tree import BaseNode
-
 import logging
 
 logger = logging.getLogger(__name__)
diff --git a/cognee/tasks/repo_processor/get_non_code_files.py b/cognee/tasks/repo_processor/get_non_code_files.py
index 9c69afd00..12f32e841 100644
--- a/cognee/tasks/repo_processor/get_non_code_files.py
+++ b/cognee/tasks/repo_processor/get_non_code_files.py
@@ -29,8 +29,105 @@ async def get_non_py_files(repo_path):
         "*.egg-info",
     }
 
+    ALLOWED_EXTENSIONS = {
+        ".txt",
+        ".md",
+        ".csv",
+        ".json",
+        ".xml",
+        ".yaml",
+        ".yml",
+        ".html",
+        ".css",
+        ".js",
+        ".ts",
+        ".jsx",
+        ".tsx",
+        ".sql",
+        ".log",
+        ".ini",
+        ".toml",
+        ".properties",
+        ".sh",
+        ".bash",
+        ".dockerfile",
+        ".gitignore",
+        ".gitattributes",
+        ".makefile",
+        ".pyproject",
+        ".requirements",
+        ".env",
+        ".pdf",
+        ".doc",
+        ".docx",
+        ".dot",
+        ".dotx",
+        ".rtf",
+        ".wps",
+        ".wpd",
+        ".odt",
+        ".ott",
+        ".ottx",
+        ".txt",
+        ".wp",
+        ".sdw",
+        ".sdx",
+        ".docm",
+        ".dotm",
+        # Additional extensions for other programming languages
+        ".java",
+        ".c",
+        ".cpp",
+        ".h",
+        ".cs",
+        ".go",
+        ".php",
+        ".rb",
+        ".swift",
+        ".pl",
+        ".lua",
+        ".rs",
+        ".scala",
+        ".kt",
+        ".sh",
+        ".sql",
+        ".v",
+        ".asm",
+        ".pas",
+        ".d",
+        ".ml",
+        ".clj",
+        ".cljs",
+        ".erl",
+        ".ex",
+        ".exs",
+        ".f",
+        ".fs",
+        ".r",
+        ".pyi",
+        ".pdb",
+        ".ipynb",
+        ".rmd",
+        ".cabal",
+        ".hs",
+        ".nim",
+        ".vhdl",
+        ".verilog",
+        ".svelte",
+        ".html",
+        ".css",
+        ".scss",
+        ".less",
+        ".json5",
+        ".yaml",
+        ".yml",
+    }
+
     def should_process(path):
-        return not any(pattern in path for pattern in IGNORED_PATTERNS)
+        _, ext = os.path.splitext(path)
+        return ext in ALLOWED_EXTENSIONS and not any(
+            pattern in path for pattern in IGNORED_PATTERNS
+        )
 
     non_py_files_paths = [
         os.path.join(root, file)
diff --git a/cognee/tasks/repo_processor/get_source_code_chunks.py b/cognee/tasks/repo_processor/get_source_code_chunks.py
index 980a86539..82fa46cf0 100644
--- a/cognee/tasks/repo_processor/get_source_code_chunks.py
+++ b/cognee/tasks/repo_processor/get_source_code_chunks.py
@@ -5,6 +5,7 @@ from uuid import NAMESPACE_OID, uuid5
 import parso
 import tiktoken
 
+from cognee.infrastructure.databases.vector import get_vector_engine
 from cognee.infrastructure.engine import DataPoint
 from cognee.shared.CodeGraphEntities import CodeFile, CodePart, SourceCodeChunk
 
@@ -126,6 +127,9 @@ def get_source_code_chunks_from_code_part(
         logger.error(f"No source code in CodeFile {code_file_part.id}")
         return
 
+    vector_engine = get_vector_engine()
+    embedding_model = vector_engine.embedding_engine.model
+    model_name = embedding_model.split("/")[-1]
     tokenizer = tiktoken.encoding_for_model(model_name)
     max_subchunk_tokens = max(1, int(granularity * max_tokens))
     subchunk_token_counts = _get_subchunk_token_counts(
@@ -150,7 +154,7 @@ def get_source_code_chunks_from_code_part(
 
 
 async def get_source_code_chunks(
-    data_points: list[DataPoint], embedding_model="text-embedding-3-large"
+    data_points: list[DataPoint],
 ) -> AsyncGenerator[list[DataPoint], None]:
     """Processes code graph datapoints, create SourceCodeChink datapoints."""
     # TODO: Add support for other embedding models, with max_token mapping
@@ -165,9 +169,7 @@ async def get_source_code_chunks(
             for code_part in data_point.contains:
                 try:
                     yield code_part
-                    for source_code_chunk in get_source_code_chunks_from_code_part(
-                        code_part, model_name=embedding_model
-                    ):
+                    for source_code_chunk in get_source_code_chunks_from_code_part(code_part):
                         yield source_code_chunk
                 except Exception as e:
                     logger.error(f"Error processing code part: {e}")
diff --git a/cognee/tasks/summarization/models.py b/cognee/tasks/summarization/models.py
index fc62209ce..bc7b4886d 100644
--- a/cognee/tasks/summarization/models.py
+++ b/cognee/tasks/summarization/models.py
@@ -17,5 +17,6 @@ class CodeSummary(DataPoint):
     __tablename__ = "code_summary"
     text: str
     summarizes: Union[CodeFile, CodePart, SourceCodeChunk]
+    pydantic_type: str = "CodeSummary"
 
     _metadata: dict = {"index_fields": ["text"], "type": "CodeSummary"}
diff --git a/cognee/tests/integration/documents/AudioDocument_test.py b/cognee/tests/integration/documents/AudioDocument_test.py
index dbd43ddda..e07a2431b 100644
--- a/cognee/tests/integration/documents/AudioDocument_test.py
+++ b/cognee/tests/integration/documents/AudioDocument_test.py
@@ -36,12 +36,12 @@ def test_AudioDocument():
         for ground_truth, paragraph_data in zip(
             GROUND_TRUTH, document.read(chunk_size=64, chunker="text_chunker")
         ):
-            assert (
-                ground_truth["word_count"] == paragraph_data.word_count
-            ), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
-            assert ground_truth["len_text"] == len(
-                paragraph_data.text
-            ), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
-            assert (
-                ground_truth["cut_type"] == paragraph_data.cut_type
-            ), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
+            assert ground_truth["word_count"] == paragraph_data.word_count, (
+                f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
+            )
+            assert ground_truth["len_text"] == len(paragraph_data.text), (
+                f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
+            )
+            assert ground_truth["cut_type"] == paragraph_data.cut_type, (
+                f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
+            )
diff --git a/cognee/tests/integration/documents/ImageDocument_test.py b/cognee/tests/integration/documents/ImageDocument_test.py
index c0877ae99..b8d585419 100644
--- a/cognee/tests/integration/documents/ImageDocument_test.py
+++ b/cognee/tests/integration/documents/ImageDocument_test.py
@@ -25,12 +25,12 @@ def test_ImageDocument():
         for ground_truth, paragraph_data in zip(
             GROUND_TRUTH, document.read(chunk_size=64, chunker="text_chunker")
         ):
-            assert (
-                ground_truth["word_count"] == paragraph_data.word_count
-            ), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
-            assert ground_truth["len_text"] == len(
-                paragraph_data.text
-            ), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
-            assert (
-                ground_truth["cut_type"] == paragraph_data.cut_type
-            ), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
+            assert ground_truth["word_count"] == paragraph_data.word_count, (
+                f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
+            )
+            assert ground_truth["len_text"] == len(paragraph_data.text), (
+                f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
+            )
+            assert ground_truth["cut_type"] == paragraph_data.cut_type, (
+                f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
+            )
diff --git a/cognee/tests/integration/documents/PdfDocument_test.py b/cognee/tests/integration/documents/PdfDocument_test.py
index 8f28815d3..fc4307846 100644
--- a/cognee/tests/integration/documents/PdfDocument_test.py
+++ b/cognee/tests/integration/documents/PdfDocument_test.py
@@ -27,12 +27,12 @@ def test_PdfDocument():
     for ground_truth, paragraph_data in zip(
         GROUND_TRUTH, document.read(chunk_size=1024, chunker="text_chunker")
     ):
-        assert (
-            ground_truth["word_count"] == paragraph_data.word_count
-        ), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
-        assert ground_truth["len_text"] == len(
-            paragraph_data.text
-        ), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
-        assert (
-            ground_truth["cut_type"] == paragraph_data.cut_type
-        ), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
+        assert ground_truth["word_count"] == paragraph_data.word_count, (
+            f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
+        )
+        assert ground_truth["len_text"] == len(paragraph_data.text), (
+            f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
+        )
+        assert ground_truth["cut_type"] == paragraph_data.cut_type, (
+            f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
+        )
diff --git a/cognee/tests/integration/documents/TextDocument_test.py b/cognee/tests/integration/documents/TextDocument_test.py
index 1e143d563..6daec62b7 100644
--- a/cognee/tests/integration/documents/TextDocument_test.py
+++ b/cognee/tests/integration/documents/TextDocument_test.py
@@ -39,12 +39,12 @@ def test_TextDocument(input_file, chunk_size):
     for ground_truth, paragraph_data in zip(
         GROUND_TRUTH[input_file], document.read(chunk_size=chunk_size, chunker="text_chunker")
     ):
-        assert (
-            ground_truth["word_count"] == paragraph_data.word_count
-        ), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
-        assert ground_truth["len_text"] == len(
-            paragraph_data.text
-        ), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
-        assert (
-            ground_truth["cut_type"] == paragraph_data.cut_type
-        ), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
+        assert ground_truth["word_count"] == paragraph_data.word_count, (
+            f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
+        )
+        assert ground_truth["len_text"] == len(paragraph_data.text), (
+            f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
+        )
+        assert ground_truth["cut_type"] == paragraph_data.cut_type, (
+            f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
+        )
diff --git a/cognee/tests/integration/documents/UnstructuredDocument_test.py b/cognee/tests/integration/documents/UnstructuredDocument_test.py
index 03b8deb49..773dc2293 100644
--- a/cognee/tests/integration/documents/UnstructuredDocument_test.py
+++ b/cognee/tests/integration/documents/UnstructuredDocument_test.py
@@ -68,35 +68,35 @@ def test_UnstructuredDocument():
     )
 
     # Test PPTX
-    for paragraph_data in pptx_document.read(chunk_size=1024):
+    for paragraph_data in pptx_document.read(chunk_size=1024, chunker="text_chunker"):
         assert 19 == paragraph_data.word_count, f" 19 != {paragraph_data.word_count = }"
         assert 104 == len(paragraph_data.text), f" 104 != {len(paragraph_data.text) = }"
-        assert (
-            "sentence_cut" == paragraph_data.cut_type
-        ), f" sentence_cut != {paragraph_data.cut_type = }"
+        assert "sentence_cut" == paragraph_data.cut_type, (
+            f" sentence_cut != {paragraph_data.cut_type = }"
+        )
 
     # Test DOCX
-    for paragraph_data in docx_document.read(chunk_size=1024):
+    for paragraph_data in docx_document.read(chunk_size=1024, chunker="text_chunker"):
         assert 16 == paragraph_data.word_count, f" 16 != {paragraph_data.word_count = }"
         assert 145 == len(paragraph_data.text), f" 145 != {len(paragraph_data.text) = }"
-        assert (
-            "sentence_end" == paragraph_data.cut_type
-        ), f" sentence_end != {paragraph_data.cut_type = }"
+        assert "sentence_end" == paragraph_data.cut_type, (
+            f" sentence_end != {paragraph_data.cut_type = }"
+        )
 
     # TEST CSV
-    for paragraph_data in csv_document.read(chunk_size=1024):
+    for paragraph_data in csv_document.read(chunk_size=1024, chunker="text_chunker"):
         assert 15 == paragraph_data.word_count, f" 15 != {paragraph_data.word_count = }"
-        assert (
-            "A A A A A A A A A,A A A A A A,A A" == paragraph_data.text
-        ), f"Read text doesn't match expected text: {paragraph_data.text}"
-        assert (
-            "sentence_cut" == paragraph_data.cut_type
-        ), f" sentence_cut != {paragraph_data.cut_type = }"
+        assert "A A A A A A A A A,A A A A A A,A A" == paragraph_data.text, (
+            f"Read text doesn't match expected text: {paragraph_data.text}"
+        )
+        assert "sentence_cut" == paragraph_data.cut_type, (
+            f" sentence_cut != {paragraph_data.cut_type = }"
+        )
 
     # Test XLSX
-    for paragraph_data in xlsx_document.read(chunk_size=1024):
+    for paragraph_data in xlsx_document.read(chunk_size=1024, chunker="text_chunker"):
         assert 36 == paragraph_data.word_count, f" 36 != {paragraph_data.word_count = }"
         assert 171 == len(paragraph_data.text), f" 171 != {len(paragraph_data.text) = }"
-        assert (
-            "sentence_cut" == paragraph_data.cut_type
-        ), f" sentence_cut != {paragraph_data.cut_type = }"
+        assert "sentence_cut" == paragraph_data.cut_type, (
+            f" sentence_cut != {paragraph_data.cut_type = }"
+        )
diff --git a/cognee/tests/test_deduplication.py b/cognee/tests/test_deduplication.py
index 9c2df032d..89c866f12 100644
--- a/cognee/tests/test_deduplication.py
+++ b/cognee/tests/test_deduplication.py
@@ -30,9 +30,9 @@ async def test_deduplication():
 
     result = await relational_engine.get_all_data_from_table("data")
     assert len(result) == 1, "More than one data entity was found."
-    assert (
-        result[0]["name"] == "Natural_language_processing_copy"
-    ), "Result name does not match expected value."
+    assert result[0]["name"] == "Natural_language_processing_copy", (
+        "Result name does not match expected value."
+    )
 
     result = await relational_engine.get_all_data_from_table("datasets")
     assert len(result) == 2, "Unexpected number of datasets found."
@@ -61,9 +61,9 @@ async def test_deduplication():
 
     result = await relational_engine.get_all_data_from_table("data")
     assert len(result) == 1, "More than one data entity was found."
-    assert (
-        hashlib.md5(text.encode("utf-8")).hexdigest() in result[0]["name"]
-    ), "Content hash is not a part of file name."
+    assert hashlib.md5(text.encode("utf-8")).hexdigest() in result[0]["name"], (
+        "Content hash is not a part of file name."
+    )
 
     await cognee.prune.prune_data()
     await cognee.prune.prune_system(metadata=True)
diff --git a/cognee/tests/test_falkordb.py b/cognee/tests/test_falkordb.py
index 07ece9eb2..af0e87916 100755
--- a/cognee/tests/test_falkordb.py
+++ b/cognee/tests/test_falkordb.py
@@ -85,9 +85,9 @@ async def main():
 
     from cognee.infrastructure.databases.relational import get_relational_engine
 
-    assert not os.path.exists(
-        get_relational_engine().db_path
-    ), "SQLite relational database is not empty"
+    assert not os.path.exists(get_relational_engine().db_path), (
+        "SQLite relational database is not empty"
+    )
 
     from cognee.infrastructure.databases.graph import get_graph_config
 
diff --git a/cognee/tests/test_library.py b/cognee/tests/test_library.py
index 8352b4161..192b67506 100755
--- a/cognee/tests/test_library.py
+++ b/cognee/tests/test_library.py
@@ -82,9 +82,9 @@ async def main():
 
     from cognee.infrastructure.databases.relational import get_relational_engine
 
-    assert not os.path.exists(
-        get_relational_engine().db_path
-    ), "SQLite relational database is not empty"
+    assert not os.path.exists(get_relational_engine().db_path), (
+        "SQLite relational database is not empty"
+    )
 
     from cognee.infrastructure.databases.graph import get_graph_config
 
diff --git a/cognee/tests/test_pgvector.py b/cognee/tests/test_pgvector.py
index c241177f0..73b6be974 100644
--- a/cognee/tests/test_pgvector.py
+++ b/cognee/tests/test_pgvector.py
@@ -24,28 +24,28 @@ async def test_local_file_deletion(data_text, file_location):
         data_hash = hashlib.md5(encoded_text).hexdigest()
         # Get data entry from database based on hash contents
         data = (await session.scalars(select(Data).where(Data.content_hash == data_hash))).one()
-        assert os.path.isfile(
-            data.raw_data_location
-        ), f"Data location doesn't exist: {data.raw_data_location}"
+        assert os.path.isfile(data.raw_data_location), (
+            f"Data location doesn't exist: {data.raw_data_location}"
+        )
         # Test deletion of data along with local files created by cognee
         await engine.delete_data_entity(data.id)
-        assert not os.path.exists(
-            data.raw_data_location
-        ), f"Data location still exists after deletion: {data.raw_data_location}"
+        assert not os.path.exists(data.raw_data_location), (
+            f"Data location still exists after deletion: {data.raw_data_location}"
+        )
 
     async with engine.get_async_session() as session:
         # Get data entry from database based on file path
         data = (
             await session.scalars(select(Data).where(Data.raw_data_location == file_location))
         ).one()
-        assert os.path.isfile(
-            data.raw_data_location
-        ), f"Data location doesn't exist: {data.raw_data_location}"
+        assert os.path.isfile(data.raw_data_location), (
+            f"Data location doesn't exist: {data.raw_data_location}"
+        )
         # Test local files not created by cognee won't get deleted
         await engine.delete_data_entity(data.id)
-        assert os.path.exists(
-            data.raw_data_location
-        ), f"Data location doesn't exists: {data.raw_data_location}"
+        assert os.path.exists(data.raw_data_location), (
+            f"Data location doesn't exists: {data.raw_data_location}"
+        )
 
 
 async def test_getting_of_documents(dataset_name_1):
@@ -54,16 +54,16 @@ async def test_getting_of_documents(dataset_name_1):
 
     user = await get_default_user()
     document_ids = await get_document_ids_for_user(user.id, [dataset_name_1])
-    assert (
-        len(document_ids) == 1
-    ), f"Number of expected documents doesn't match {len(document_ids)} != 1"
+    assert len(document_ids) == 1, (
+        f"Number of expected documents doesn't match {len(document_ids)} != 1"
+    )
 
     # Test getting of documents for search when no dataset is provided
     user = await get_default_user()
     document_ids = await get_document_ids_for_user(user.id)
-    assert (
-        len(document_ids) == 2
-    ), f"Number of expected documents doesn't match {len(document_ids)} != 2"
+    assert len(document_ids) == 2, (
+        f"Number of expected documents doesn't match {len(document_ids)} != 2"
+    )
 
 
 async def main():
diff --git a/cognee/tests/unit/processing/chunks/chunk_by_paragraph_2_test.py b/cognee/tests/unit/processing/chunks/chunk_by_paragraph_2_test.py
index 728b5cda4..d8680a604 100644
--- a/cognee/tests/unit/processing/chunks/chunk_by_paragraph_2_test.py
+++ b/cognee/tests/unit/processing/chunks/chunk_by_paragraph_2_test.py
@@ -17,9 +17,9 @@ batch_paragraphs_vals = [True, False]
 def test_chunk_by_paragraph_isomorphism(input_text, paragraph_length, batch_paragraphs):
     chunks = chunk_by_paragraph(input_text, paragraph_length, batch_paragraphs)
     reconstructed_text = "".join([chunk["text"] for chunk in chunks])
-    assert (
-        reconstructed_text == input_text
-    ), f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
+    assert reconstructed_text == input_text, (
+        f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
+    )
 
 
 @pytest.mark.parametrize(
@@ -27,14 +27,18 @@ def test_chunk_by_paragraph_isomorphism(input_text, paragraph_length, batch_para
     list(product(list(INPUT_TEXTS.values()), paragraph_lengths, batch_paragraphs_vals)),
 )
 def test_paragraph_chunk_length(input_text, paragraph_length, batch_paragraphs):
-    chunks = list(chunk_by_paragraph(input_text, paragraph_length, batch_paragraphs))
+    chunks = list(
+        chunk_by_paragraph(
+            data=input_text, paragraph_length=paragraph_length, batch_paragraphs=batch_paragraphs
+        )
+    )
 
     chunk_lengths = np.array([len(list(chunk_by_word(chunk["text"]))) for chunk in chunks])
 
     larger_chunks = chunk_lengths[chunk_lengths > paragraph_length]
-    assert np.all(
-        chunk_lengths <= paragraph_length
-    ), f"{paragraph_length = }: {larger_chunks} are too large"
+    assert np.all(chunk_lengths <= paragraph_length), (
+        f"{paragraph_length = }: {larger_chunks} are too large"
+    )
 
 
 @pytest.mark.parametrize(
@@ -42,8 +46,10 @@ def test_paragraph_chunk_length(input_text, paragraph_length, batch_paragraphs):
     list(product(list(INPUT_TEXTS.values()), paragraph_lengths, batch_paragraphs_vals)),
 )
 def test_chunk_by_paragraph_chunk_numbering(input_text, paragraph_length, batch_paragraphs):
-    chunks = chunk_by_paragraph(input_text, paragraph_length, batch_paragraphs)
+    chunks = chunk_by_paragraph(
+        data=input_text, paragraph_length=paragraph_length, batch_paragraphs=batch_paragraphs
+    )
     chunk_indices = np.array([chunk["chunk_index"] for chunk in chunks])
-    assert np.all(
-        chunk_indices == np.arange(len(chunk_indices))
-    ), f"{chunk_indices = } are not monotonically increasing"
+    assert np.all(chunk_indices == np.arange(len(chunk_indices))), (
+        f"{chunk_indices = } are not monotonically increasing"
+    )
diff --git a/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test.py b/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test.py
index 3ddc6f4f5..e420b2e9f 100644
--- a/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test.py
+++ b/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test.py
@@ -49,16 +49,18 @@ Third paragraph is cut and is missing the dot at the end""",
 
 def run_chunking_test(test_text, expected_chunks):
     chunks = []
-    for chunk_data in chunk_by_paragraph(test_text, 12, batch_paragraphs=False):
+    for chunk_data in chunk_by_paragraph(
+        data=test_text, paragraph_length=12, batch_paragraphs=False
+    ):
         chunks.append(chunk_data)
 
     assert len(chunks) == 3
 
     for expected_chunks_item, chunk in zip(expected_chunks, chunks):
         for key in ["text", "word_count", "cut_type"]:
-            assert (
-                chunk[key] == expected_chunks_item[key]
-            ), f"{key = }: {chunk[key] = } != {expected_chunks_item[key] = }"
+            assert chunk[key] == expected_chunks_item[key], (
+                f"{key = }: {chunk[key] = } != {expected_chunks_item[key] = }"
+            )
 
 
 def test_chunking_whole_text():
diff --git a/cognee/tests/unit/processing/chunks/chunk_by_sentence_test.py b/cognee/tests/unit/processing/chunks/chunk_by_sentence_test.py
index d1c75d7ed..efa053077 100644
--- a/cognee/tests/unit/processing/chunks/chunk_by_sentence_test.py
+++ b/cognee/tests/unit/processing/chunks/chunk_by_sentence_test.py
@@ -16,9 +16,9 @@ maximum_length_vals = [None, 8, 64]
 def test_chunk_by_sentence_isomorphism(input_text, maximum_length):
     chunks = chunk_by_sentence(input_text, maximum_length)
     reconstructed_text = "".join([chunk[1] for chunk in chunks])
-    assert (
-        reconstructed_text == input_text
-    ), f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
+    assert reconstructed_text == input_text, (
+        f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
+    )
 
 
 @pytest.mark.parametrize(
@@ -36,6 +36,6 @@ def test_paragraph_chunk_length(input_text, maximum_length):
     chunk_lengths = np.array([len(list(chunk_by_word(chunk[1]))) for chunk in chunks])
 
     larger_chunks = chunk_lengths[chunk_lengths > maximum_length]
-    assert np.all(
-        chunk_lengths <= maximum_length
-    ), f"{maximum_length = }: {larger_chunks} are too large"
+    assert np.all(chunk_lengths <= maximum_length), (
+        f"{maximum_length = }: {larger_chunks} are too large"
+    )
diff --git a/cognee/tests/unit/processing/chunks/chunk_by_word_test.py b/cognee/tests/unit/processing/chunks/chunk_by_word_test.py
index fb26638cb..d79fcdbc8 100644
--- a/cognee/tests/unit/processing/chunks/chunk_by_word_test.py
+++ b/cognee/tests/unit/processing/chunks/chunk_by_word_test.py
@@ -17,9 +17,9 @@ from cognee.tests.unit.processing.chunks.test_input import INPUT_TEXTS
 def test_chunk_by_word_isomorphism(input_text):
     chunks = chunk_by_word(input_text)
     reconstructed_text = "".join([chunk[0] for chunk in chunks])
-    assert (
-        reconstructed_text == input_text
-    ), f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
+    assert reconstructed_text == input_text, (
+        f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
+    )
 
 
 @pytest.mark.parametrize(
diff --git a/evals/eval_swe_bench.py b/evals/eval_swe_bench.py
index 789c95ab4..a8b4c8a1d 100644
--- a/evals/eval_swe_bench.py
+++ b/evals/eval_swe_bench.py
@@ -11,8 +11,9 @@ from cognee.api.v1.cognify.code_graph_pipeline import run_code_graph_pipeline
 from cognee.api.v1.search import SearchType
 from cognee.infrastructure.llm.get_llm_client import get_llm_client
 from cognee.infrastructure.llm.prompts import read_query_prompt
-from cognee.modules.retrieval.brute_force_triplet_search import brute_force_triplet_search
-from cognee.shared.utils import render_graph
+from cognee.modules.retrieval.description_to_codepart_search import (
+    code_description_to_code_part_search,
+)
 from evals.eval_utils import download_github_repo, retrieved_edges_to_string
 
 
@@ -32,25 +33,18 @@ def check_install_package(package_name):
             return False
 
 
-async def generate_patch_with_cognee(instance, llm_client, search_type=SearchType.CHUNKS):
+async def generate_patch_with_cognee(instance):
     repo_path = download_github_repo(instance, "../RAW_GIT_REPOS")
-    pipeline = await run_code_graph_pipeline(repo_path)
-
-    async for result in pipeline:
-        print(result)
-
-    print("Here we have the repo under the repo_path")
-
-    await render_graph(None, include_labels=True, include_nodes=True)
-
+    include_docs = True
     problem_statement = instance["problem_statement"]
     instructions = read_query_prompt("patch_gen_kg_instructions.txt")
 
-    retrieved_edges = await brute_force_triplet_search(
-        problem_statement, top_k=3, collections=["data_point_source_code", "data_point_text"]
-    )
+    async for result in run_code_graph_pipeline(repo_path, include_docs=include_docs):
+        print(result)
 
-    retrieved_edges_str = retrieved_edges_to_string(retrieved_edges)
+    retrieved_codeparts = await code_description_to_code_part_search(
+        problem_statement, include_docs=include_docs
+    )
 
     prompt = "\n".join(
         [
@@ -58,8 +52,8 @@ async def generate_patch_with_cognee(instance, llm_client, search_type=SearchTyp
             "<patch>",
             PATCH_EXAMPLE,
             "</patch>",
-            "These are the retrieved edges:",
-            retrieved_edges_str,
+            "This is the additional context to solve the problem (description from documentation together with codeparts):",
+            retrieved_codeparts,
         ]
     )
 
@@ -85,8 +79,6 @@ async def generate_patch_without_cognee(instance, llm_client):
 
 
 async def get_preds(dataset, with_cognee=True):
-    llm_client = get_llm_client()
-
     if with_cognee:
         model_name = "with_cognee"
         pred_func = generate_patch_with_cognee
@@ -94,17 +86,18 @@ async def get_preds(dataset, with_cognee=True):
         model_name = "without_cognee"
         pred_func = generate_patch_without_cognee
 
-    futures = [(instance["instance_id"], pred_func(instance, llm_client)) for instance in dataset]
-    model_patches = await asyncio.gather(*[x[1] for x in futures])
+    preds = []
 
-    preds = [
-        {
-            "instance_id": instance_id,
-            "model_patch": model_patch,
-            "model_name_or_path": model_name,
-        }
-        for (instance_id, _), model_patch in zip(futures, model_patches)
-    ]
+    for instance in dataset:
+        instance_id = instance["instance_id"]
+        model_patch = await pred_func(instance)  # Sequentially await the async function
+        preds.append(
+            {
+                "instance_id": instance_id,
+                "model_patch": model_patch,
+                "model_name_or_path": model_name,
+            }
+        )
 
     return preds
 
@@ -134,6 +127,7 @@ async def main():
         with open(predictions_path, "w") as file:
             json.dump(preds, file)
 
+    """ This part is for the evaluation
     subprocess.run(
         [
             "python",
@@ -151,6 +145,7 @@ async def main():
             "test_run",
         ]
     )
+    """
 
 
 if __name__ == "__main__":
diff --git a/examples/python/code_graph_example.py b/examples/python/code_graph_example.py
index c90a0b606..59229344f 100644
--- a/examples/python/code_graph_example.py
+++ b/examples/python/code_graph_example.py
@@ -1,7 +1,9 @@
 import argparse
 import asyncio
+import logging
 
 from cognee.api.v1.cognify.code_graph_pipeline import run_code_graph_pipeline
+from cognee.shared.utils import setup_logging
 
 
 async def main(repo_path, include_docs):
@@ -9,7 +11,7 @@ async def main(repo_path, include_docs):
         print(result)
 
 
-if __name__ == "__main__":
+def parse_args():
     parser = argparse.ArgumentParser()
     parser.add_argument("--repo_path", type=str, required=True, help="Path to the repository")
     parser.add_argument(
@@ -18,5 +20,28 @@ if __name__ == "__main__":
         default=True,
         help="Whether or not to process non-code files",
     )
-    args = parser.parse_args()
-    asyncio.run(main(args.repo_path, args.include_docs))
+    parser.add_argument(
+        "--time",
+        type=lambda x: x.lower() in ("true", "1"),
+        default=True,
+        help="Whether or not to time the pipeline run",
+    )
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    setup_logging(logging.ERROR)
+
+    args = parse_args()
+
+    if args.time:
+        import time
+
+        start_time = time.time()
+        asyncio.run(main(args.repo_path, args.include_docs))
+        end_time = time.time()
+        print("\n" + "=" * 50)
+        print(f"Pipeline Execution Time: {end_time - start_time:.2f} seconds")
+        print("=" * 50 + "\n")
+    else:
+        asyncio.run(main(args.repo_path, args.include_docs))