Merge branch 'main' of github.com:topoteretes/cognee into COG-334-structure-routing

2024-11-05 22:53:33 +01:00 · 2024-11-05 22:53:33 +01:00 · ddf495266b
commit ddf495266b
parent bce6540302 401befc687
30 changed files with 1661 additions and 1329 deletions
--- a/.github/workflows/auto-comment.yml
+++ b/.github/workflows/auto-comment.yml
@ -0,0 +1,81 @@
+name: Issue and PR Auto Comments
+on:
+  issues:
+    types:
+      - opened
+      - closed
+      - assigned
+  pull_request_target:
+    types:
+      - opened
+      - closed
+
+permissions:
+  contents: read
+
+jobs:
+  auto-comment:
+    permissions:
+      issues: write
+      pull-requests: write
+    runs-on: ubuntu-latest
+    steps:
+      # configuration for auto-comment actions
+      - name: Configure Auto Comments
+        uses: wow-actions/auto-comment@v1
+        with:
+          GITHUB_TOKEN: ${{ secrets.GH_TOKEN }}
+          issuesOpened: |
+            👋 @{{ author }}
+            
+            Thank you for raising an issue. We will investigate the matter and get back to you as soon as possible.
+            
+            To help us address your issue efficiently, please ensure you have provided:
+            - A clear description of the problem
+            - Steps to reproduce (if applicable)
+            - Expected vs actual behavior
+            - Any relevant screenshots or error messages
+            
+            Our team typically responds within 2-3 business days.
+
+          issuesClosed: |
+            ✅ @{{ author }}
+            
+            This issue has been closed. If you have any further questions or if the issue resurfaces, 
+            please feel free to:
+            - Add a comment to this thread
+            - Open a new issue with reference to this one
+            
+            Thank you for helping us improve!
+
+          pullRequestOpened: |
+            👍 @{{ author }}
+            
+            Thank you for your pull request and contributing to our community!
+            
+            Please ensure you have:
+            - [ ] Followed our contributing guidelines
+            - [ ] Added/updated tests (if applicable)
+            - [ ] Updated documentation (if applicable)
+            - [ ] Added a descriptive PR title
+            
+            Our team will review your contribution as soon as possible. Feel free to reach out if you need any assistance.
+
+      # Separate action for merged PRs
+      - name: Handle Merged Pull Requests
+        if: github.event.pull_request.merged == true
+        uses: actions-cool/pr-welcome@v1.2.1
+        with:
+          token: ${{ secrets.GH_TOKEN }}
+          comment: |
+            🎉 Fantastic work @${{ github.event.pull_request.user.login }}! 🎉
+            
+            Your pull request has been merged successfully. Thank you for your valuable contribution!
+            
+            We appreciate the time and effort you've put into improving our project.
+            Your changes will be included in our next release.
+            
+            Keep up the great work! 💪
+          emoji: 'rocket'
+          pr-emoji: '+1, heart, rocket'
+          
--- a/.github/workflows/docker_compose.yml
+++ b/.github/workflows/docker_compose.yml
@ -20,10 +20,16 @@ jobs:
      uses: docker/setup-buildx-action@v3

    - name: Build Docker images
+      env:
+        ENVIRONMENT: dev
+        ENV: dev
      run: |
        docker compose -f docker-compose.yml build

    - name: Run Docker Compose
+      env:
+        ENVIRONMENT: dev
+        ENV: dev
      run: |
        docker compose -f docker-compose.yml up -d

--- a/.github/workflows/test_notebook.yml
+++ b/.github/workflows/test_notebook.yml
@ -0,0 +1,61 @@
+name: test | notebook
+
+on:
+  pull_request:
+    branches:
+      - main
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+env:
+  RUNTIME__LOG_LEVEL: ERROR
+
+jobs:
+  get_docs_changes:
+    name: docs changes
+    uses: ./.github/workflows/get_docs_changes.yml
+
+  run_notebook_test:
+    name: test
+    needs: get_docs_changes
+    if: needs.get_docs_changes.outputs.changes_outside_docs == 'true'
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash
+    steps:
+      - name: Check out
+        uses: actions/checkout@master
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11.x'
+
+      - name: Install Poetry
+        uses: snok/install-poetry@v1.3.2
+        with:
+          virtualenvs-create: true
+          virtualenvs-in-project: true
+          installer-parallel: true
+
+      - name: Install dependencies
+        run: |
+          poetry install --no-interaction
+          poetry add jupyter --no-interaction
+
+      - name: Execute Jupyter Notebook
+        env:
+          ENV: 'dev'
+          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
+          GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
+        run: |
+          poetry run jupyter nbconvert \
+          --to notebook \
+          --execute notebooks/cognee_demo.ipynb \
+          --output executed_notebook.ipynb \
+          --ExecutePreprocessor.timeout=1200
--- a/2
+++ b/2
@ -25,7 +25,7 @@ RUN pip install poetry
 RUN poetry config virtualenvs.create false

 # Install the dependencies
-RUN poetry install --no-root --no-dev
+RUN poetry install --all-extras --no-root --no-dev


 # Set the PYTHONPATH environment variable to include the /app directory
--- a/README.md
+++ b/README.md
@ -29,6 +29,10 @@ If you have questions, join our  <a href="https://discord.gg/NQPKmU5CCg">Discord
 pip install cognee
 ```

+### With pip with PostgreSQL support
+```bash
+pip install cognee[postgres]
+```

 ### With poetry

@ -36,6 +40,11 @@ pip install cognee
 poetry add cognee
 ```

+### With poetry with PostgreSQL support
+
+```bash
+poetry add cognee -E postgres
+```

 ## 💻 Basic Usage

@ -50,7 +59,7 @@ os.environ["LLM_API_KEY"] = "YOUR OPENAI_API_KEY"
 or 
 ```
 import cognee
-cognee.config.llm_api_key = "YOUR_OPENAI_API_KEY"
+cognee.config.set_llm_api_key("YOUR_OPENAI_API_KEY")
 ```
 You can also set the variables by creating .env file, here is our <a href="https://github.com/topoteretes/cognee/blob/main/.env.template">template.</a>
 To use different LLM providers, for more info check out our <a href="https://topoteretes.github.io/cognee">documentation</a>
@ -73,26 +82,54 @@ docker-compose up
 ```
 Then navigate to localhost:3000

+If you want to use the UI with PostgreSQL through docker-compose make sure to set the following values in the .env file: 
+```
+DB_PROVIDER=postgres
+
+DB_HOST=postgres
+DB_PORT=5432
+
+DB_NAME=cognee_db
+DB_USERNAME=cognee
+DB_PASSWORD=cognee
+``` 
+
 ### Simple example

-Run the default cognee pipeline:
+First, copy `.env.template` to `.env` and add your OpenAI API key to the LLM_API_KEY field. 

-```
+Optionally, set `VECTOR_DB_PROVIDER="lancedb"` in `.env` to simplify setup.
+
+This script will run the default pipeline:
+
+```python
 import cognee
+import asyncio
+from cognee.api.v1.search import SearchType

-text = """Natural language processing (NLP) is an interdisciplinary
-       subfield of computer science and information retrieval"""
+async def main():
+    await cognee.prune.prune_data()  # Reset cognee data
+    await cognee.prune.prune_system(metadata=True)  # Reset cognee system state

-await cognee.add(text) # Add a new piece of information
+    text = """
+    Natural language processing (NLP) is an interdisciplinary
+    subfield of computer science and information retrieval.
+    """

-await cognee.cognify() # Use LLMs and cognee to create a knowledge graph
+    await cognee.add(text)  # Add text to cognee
+    await cognee.cognify()  # Use LLMs and cognee to create knowledge graph

-search_results = await cognee.search("INSIGHTS", {'query': 'NLP'}) # Query cognee for the insights
+    search_results = await cognee.search(  # Search cognee for insights
+        SearchType.INSIGHTS,
+        {'query': 'Tell me about NLP'}
+    )

-for result in search_results:
-    do_something_with_result(result)
+    for result_text in search_results:  # Display results
+        print(result_text)
+
+asyncio.run(main())
 ```
-
+A version of this example is here: `examples/pyton/simple_example.py`

 ### Create your own memory store

--- a/cognee/api/v1/config/config.py
+++ b/cognee/api/v1/config/config.py
@ -5,6 +5,7 @@ from cognee.modules.cognify.config import get_cognify_config
 from cognee.infrastructure.data.chunking.config import get_chunk_config
 from cognee.infrastructure.databases.vector import get_vectordb_config
 from cognee.infrastructure.databases.graph.config import get_graph_config
+from cognee.infrastructure.llm.config import get_llm_config
 from cognee.infrastructure.databases.relational import get_relational_config
 from cognee.infrastructure.files.storage import LocalStorage

@ -55,19 +56,36 @@ class config():
        graph_config.graph_database_provider = graph_database_provider

    @staticmethod
-    def llm_provider(llm_provider: str):
-        graph_config = get_graph_config()
-        graph_config.llm_provider = llm_provider
+    def set_llm_provider(llm_provider: str):
+        llm_config = get_llm_config()
+        llm_config.llm_provider = llm_provider

    @staticmethod
-    def llm_endpoint(llm_endpoint: str):
-        graph_config = get_graph_config()
-        graph_config.llm_endpoint = llm_endpoint
+    def set_llm_endpoint(llm_endpoint: str):
+        llm_config = get_llm_config()
+        llm_config.llm_endpoint = llm_endpoint

    @staticmethod
-    def llm_model(llm_model: str):
-        graph_config = get_graph_config()
-        graph_config.llm_model = llm_model
+    def set_llm_model(llm_model: str):
+        llm_config = get_llm_config()
+        llm_config.llm_model = llm_model
+
+    @staticmethod
+    def set_llm_api_key(llm_api_key: str):
+        llm_config = get_llm_config()
+        llm_config.llm_api_key = llm_api_key
+
+    @staticmethod
+    def set_llm_config(config_dict: dict):
+        """
+            Updates the llm config with values from config_dict.
+        """
+        llm_config = get_llm_config()
+        for key, value in config_dict.items():
+            if hasattr(llm_config, key):
+                object.__setattr__(llm_config, key, value)
+            else:
+                raise AttributeError(f"'{key}' is not a valid attribute of the config.")

    @staticmethod
    def set_chunk_strategy(chunk_strategy: object):
@ -137,5 +155,5 @@ class config():
        if "username" not in graphistry_config or "password" not in graphistry_config:
            raise ValueError("graphistry_config dictionary must contain 'username' and 'password' keys.")

-        base_config.graphistry_username = graphistry_config.username
-        base_config.graphistry_password = graphistry_config.password
+        base_config.graphistry_username = graphistry_config.get("username")
+        base_config.graphistry_password = graphistry_config.get("password")
--- a/cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py
+++ b/cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py
@ -1,25 +0,0 @@
-from typing import List, Optional
-from fastembed import TextEmbedding
-from cognee.root_dir import get_absolute_path
-from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
-
-class FastembedEmbeddingEngine(EmbeddingEngine):
-    embedding_model: str
-    embedding_dimensions: int
-
-    def __init__(
-        self,
-        embedding_model: Optional[str] = "BAAI/bge-large-en-v1.5",
-        embedding_dimensions: Optional[int] = 1024,
-    ):
-        self.embedding_model = embedding_model
-        self.embedding_dimensions = embedding_dimensions
-
-    async def embed_text(self, text: List[str]) -> List[float]:
-        embedding_model = TextEmbedding(model_name = self.embedding_model, cache_dir = get_absolute_path("cache/embeddings"))
-        embeddings_list = list(map(lambda embedding: embedding.tolist(), embedding_model.embed(text)))
-
-        return embeddings_list
-
-    def get_vector_size(self) -> int:
-        return self.embedding_dimensions
--- a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py
+++ b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py
@ -164,7 +164,10 @@ class LanceDBAdapter(VectorDBInterface):
    async def delete_data_points(self, collection_name: str, data_point_ids: list[str]):
        connection = await self.get_connection()
        collection = await connection.open_table(collection_name)
-        results = await collection.delete(f"id IN {tuple(data_point_ids)}")
+        if len(data_point_ids) == 1:
+            results = await collection.delete(f"id = '{data_point_ids[0]}'")
+        else:
+            results = await collection.delete(f"id IN {tuple(data_point_ids)}")
        return results

    async def prune(self):
--- a/cognee/infrastructure/files/utils/extract_text_from_file.py
+++ b/cognee/infrastructure/files/utils/extract_text_from_file.py
@ -1,7 +1,8 @@
 from typing import BinaryIO
 from pypdf import PdfReader
+import filetype

-def extract_text_from_file(file: BinaryIO, file_type) -> str:
+def extract_text_from_file(file: BinaryIO, file_type: filetype.Type) -> str:
    """Extract text from a file"""
    if file_type.extension == "pdf":
        reader = PdfReader(stream = file)
--- a/cognee/infrastructure/files/utils/get_file_size.py
+++ b/cognee/infrastructure/files/utils/get_file_size.py
@ -1,5 +0,0 @@
-import os
-
-def get_file_size(file_path: str):
-    """Get the size of a file"""
-    return os.path.getsize(file_path)
--- a/cognee/modules/cognify/evaluate.py
+++ b/cognee/modules/cognify/evaluate.py
@ -1,4 +1,3 @@
-import dsp
 import dspy
 from dspy.evaluate.evaluate import Evaluate
 from dspy.primitives.example import Example
--- a/cognee/modules/cognify/train.py
+++ b/cognee/modules/cognify/train.py
@ -1,4 +1,3 @@
-import dsp
 import dspy
 from dspy.teleprompt import BootstrapFewShot
 from dspy.primitives.example import Example
--- a/cognee/modules/pipelines/Pipeline.py
+++ b/cognee/modules/pipelines/Pipeline.py
@ -5,7 +5,7 @@ from .models.Task import Task

 class PipelineConfig(BaseModel):
    batch_count: int = 10
-    description: Optional[str]
+    description: Optional[str] = None

 class Pipeline():
    id: UUID = uuid4()
--- a/cognee/modules/pipelines/operations/run_parallel.py
+++ b/cognee/modules/pipelines/operations/run_parallel.py
@ -1,8 +1,8 @@
-from typing import Any, Callable, Generator
+from typing import Any, Callable, Generator, List
 import asyncio
 from ..tasks.Task import Task

-def run_tasks_parallel(tasks: [Task]) -> Callable[[Any], Generator[Any, Any, Any]]:
+def run_tasks_parallel(tasks: List[Task]) -> Callable[[Any], Generator[Any, Any, Any]]:
    async def parallel_run(*args, **kwargs):
        parallel_tasks = [asyncio.create_task(task.run(*args, **kwargs)) for task in tasks]

--- a/cognee/shared/GithubTopology.py
+++ b/cognee/shared/GithubTopology.py
@ -18,7 +18,7 @@ class Directory(BaseModel):
    directories: List['Directory'] = []

 # Allows recursive Directory Model
-Directory.update_forward_refs()
+Directory.model_rebuild()

 class RepositoryProperties(BaseModel):
    custom_properties: Optional[Dict[str, Any]] = None
--- a/cognee/shared/SourceCodeGraph.py
+++ b/cognee/shared/SourceCodeGraph.py
@ -6,15 +6,15 @@ class BaseClass(BaseModel):
    name: str
    type: Literal["Class"] = "Class"
    description: str
-    constructor_parameters: Optional[List[str]]
+    constructor_parameters: Optional[List[str]] = None

 class Class(BaseModel):
    id: str
    name: str
    type: Literal["Class"] = "Class"
    description: str
-    constructor_parameters: Optional[List[str]]
-    from_class: Optional[BaseClass]
+    constructor_parameters: Optional[List[str]] = None
+    from_class: Optional[BaseClass] = None

 class ClassInstance(BaseModel):
    id: str
@ -28,7 +28,7 @@ class Function(BaseModel):
    name: str
    type: Literal["Function"] = "Function"
    description: str
-    parameters: Optional[List[str]]
+    parameters: Optional[List[str]] = None
    return_type: str
    is_static: Optional[bool] = False

@ -38,7 +38,7 @@ class Variable(BaseModel):
    type: Literal["Variable"] = "Variable"
    description: str
    is_static: Optional[bool] = False
-    default_value: Optional[str]
+    default_value: Optional[str] = None

 class Operator(BaseModel):
    id: str
--- a/cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py
+++ b/cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py
@ -21,7 +21,6 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
    for chunk_index, chunk in enumerate(data_chunks):
        chunk_classification = chunk_classifications[chunk_index]
        classification_data_points.append(uuid5(NAMESPACE_OID, chunk_classification.label.type))
-        classification_data_points.append(uuid5(NAMESPACE_OID, chunk_classification.label.type))

        for classification_subclass in chunk_classification.label.subclass:
            classification_data_points.append(uuid5(NAMESPACE_OID, classification_subclass.value))
@ -39,7 +38,7 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
    if await vector_engine.has_collection(collection_name):
        existing_data_points = await vector_engine.retrieve(
            collection_name,
-            list(set(classification_data_points)),
+            [str(classification_data) for classification_data in list(set(classification_data_points))],
        ) if len(classification_data_points) > 0 else []

        existing_points_map = {point.id: True for point in existing_data_points}
@ -60,7 +59,7 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
            data_points.append(
                DataPoint[Keyword](
                    id=str(classification_type_id),
-                    payload=Keyword.parse_obj({
+                    payload=Keyword.model_validate({
                        "uuid": str(classification_type_id),
                        "text": classification_type_label,
                        "chunk_id": str(data_chunk.chunk_id),
@ -99,7 +98,7 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
                data_points.append(
                    DataPoint[Keyword](
                        id=str(classification_subtype_id),
-                        payload=Keyword.parse_obj({
+                        payload=Keyword.model_validate({
                            "uuid": str(classification_subtype_id),
                            "text": classification_subtype_label,
                            "chunk_id": str(data_chunk.chunk_id),
--- a/cognee/tasks/infer_data_ontology/infer_data_ontology.py
+++ b/cognee/tasks/infer_data_ontology/infer_data_ontology.py
@ -56,7 +56,7 @@ class OntologyEngine:
            for item in items:
                flat_list.extend(await self.recursive_flatten(item, parent_id))
        elif isinstance(items, dict):
-            model = NodeModel.parse_obj(items)
+            model = NodeModel.model_validate(items)
            flat_list.append(await self.flatten_model(model, parent_id))
            for child in model.children:
                flat_list.extend(await self.recursive_flatten(child, model.node_id))
--- a/cognee/tasks/infer_data_ontology/models/models.py
+++ b/cognee/tasks/infer_data_ontology/models/models.py
@ -12,7 +12,7 @@ class NodeModel(BaseModel):
    default_relationship: Optional[RelationshipModel] = None
    children: List[Union[Dict[str, Any], "NodeModel"]] = Field(default_factory=list)

-NodeModel.update_forward_refs()
+NodeModel.model_rebuild()


 class OntologyNode(BaseModel):
--- a/cognee/tasks/save_chunks_to_store/save_chunks_to_store.py
+++ b/cognee/tasks/save_chunks_to_store/save_chunks_to_store.py
@ -11,7 +11,7 @@ async def save_chunks_to_store(data_chunks: list[DocumentChunk], collection_name

    # Remove and unlink existing chunks
    if await vector_engine.has_collection(collection_name):
-        existing_chunks = [DocumentChunk.parse_obj(chunk.payload) for chunk in (await vector_engine.retrieve(
+        existing_chunks = [DocumentChunk.model_validate(chunk.payload) for chunk in (await vector_engine.retrieve(
            collection_name,
            [str(chunk.chunk_id) for chunk in data_chunks],
        ))]
--- a/cognee/tests/test_library.py
+++ b/cognee/tests/test_library.py
@ -49,7 +49,7 @@ async def  main():

    search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
    assert len(search_results) != 0, "Query related summaries don't exist."
-    print("\n\Extracted summaries are:\n")
+    print("\nExtracted summaries are:\n")
    for result in search_results:
        print(f"{result}\n")

--- a/cognee/tests/test_neo4j.py
+++ b/cognee/tests/test_neo4j.py
@ -53,7 +53,7 @@ async def main():

    search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
    assert len(search_results) != 0, "Query related summaries don't exist."
-    print("\n\Extracted summaries are:\n")
+    print("\nExtracted summaries are:\n")
    for result in search_results:
        print(f"{result}\n")

--- a/cognee/tests/test_qdrant.py
+++ b/cognee/tests/test_qdrant.py
@ -54,7 +54,7 @@ async def main():

    search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
    assert len(search_results) != 0, "Query related summaries don't exist."
-    print("\n\Extracted summaries are:\n")
+    print("\nExtracted summaries are:\n")
    for result in search_results:
        print(f"{result}\n")

--- a/cognee/tests/test_weaviate.py
+++ b/cognee/tests/test_weaviate.py
@ -52,7 +52,7 @@ async def main():

    search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
    assert len(search_results) != 0, "Query related summaries don't exist."
-    print("\n\Extracted summaries are:\n")
+    print("\nExtracted summaries are:\n")
    for result in search_results:
        print(f"{result}\n")

--- a/docs/configuration.md
+++ b/docs/configuration.md
@ -14,9 +14,11 @@ Check available configuration options:
 from cognee.infrastructure.databases.vector import get_vectordb_config
 from cognee.infrastructure.databases.graph.config import get_graph_config
 from cognee.infrastructure.databases.relational import get_relational_config
+from cognee.infrastructure.llm.config import get_llm_config
 print(get_vectordb_config().to_dict())
 print(get_graph_config().to_dict())
 print(get_relational_config().to_dict())
+print(get_llm_config().to_dict())

 ```

@ -29,8 +31,7 @@ GRAPH_DATABASE_PROVIDER = 'lancedb'
 Otherwise, you can set the configuration yourself:

 ```python
-
-cognee.config.llm_provider = 'ollama'
+cognee.config.set_llm_provider('ollama')
 ```

 ## 🚀 Getting Started with Local Models
@ -52,15 +53,14 @@ LLM_PROVIDER = 'ollama'
 Otherwise, you can set the configuration for the model:

 ```bash
-cognee.config.llm_provider = 'ollama'
+cognee.config.set_llm_provider('ollama')

 ```
 You can also set the HOST and model name:

 ```bash
-
-cognee.config.llm_endpoint = "http://localhost:11434/v1"
-cognee.config.llm_model = "mistral:instruct"
+cognee.config.set_llm_endpoint("http://localhost:11434/v1")
+cognee.config.set_llm_model("mistral:instruct")
 ```


@ -73,7 +73,7 @@ LLM_PROVIDER = 'custom'
 Otherwise, you can set the configuration for the model:

 ```bash
-cognee.config.llm_provider = 'custom'
+cognee.config.set_llm_provider('custom')

 ```
 You can also set the HOST  and model name:
--- a/examples/python/simple_example.py
+++ b/examples/python/simple_example.py
@ -0,0 +1,39 @@
+import cognee
+import asyncio
+from cognee.api.v1.search import SearchType
+
+# Prerequisites:
+# 1. Copy `.env.template` and rename it to `.env`.
+# 2. Add your OpenAI API key to the `.env` file in the `LLM_API_KEY` field:
+#    LLM_API_KEY = "your_key_here"
+# 3. (Optional) To minimize setup effort, set `VECTOR_DB_PROVIDER="lancedb"` in `.env".
+
+async def main():
+    # Create a clean slate for cognee -- reset data and system state
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+
+    # cognee knowledge graph will be created based on this text
+    text = """
+    Natural language processing (NLP) is an interdisciplinary
+    subfield of computer science and information retrieval.
+    """
+
+    # Add the text, and make it available for cognify
+    await cognee.add(text)
+
+    # Use LLMs and cognee to create knowledge graph
+    await cognee.cognify()
+
+    # Query cognee for insights on the added text
+    search_results = await cognee.search(
+        SearchType.INSIGHTS,
+        {'query': 'Tell me about NLP'}
+    )
+
+    # Display search results
+    for result_text in search_results:
+        print(result_text)
+
+if __name__ == '__main__':
+    asyncio.run(main())
--- a/log.txt
+++ b/log.txt
--- a/notebooks/cognee_demo.ipynb
+++ b/notebooks/cognee_demo.ipynb
@ -537,10 +537,14 @@
                "import os\n",
                "\n",
                "# # Setting environment variables\n",
-                "os.environ[\"GRAPHISTRY_USERNAME\"] = \"\"\n",
-                "os.environ[\"GRAPHISTRY_PASSWORD\"] = \"\"\n",
+                "if \"GRAPHISTRY_USERNAME\" not in os.environ: \n",
+                "    os.environ[\"GRAPHISTRY_USERNAME\"] = \"\"\n",
                "\n",
-                "os.environ[\"LLM_API_KEY\"] = \"\"\n",
+                "if \"GRAPHISTRY_PASSWORD\" not in os.environ: \n",
+                "    os.environ[\"GRAPHISTRY_PASSWORD\"] = \"\"\n",
+                "\n",
+                "if \"LLM_API_KEY\" not in os.environ:\n",
+                "    os.environ[\"LLM_API_KEY\"] = \"\"\n",
                "\n",
                "os.environ[\"GRAPH_DATABASE_PROVIDER\"]=\"networkx\" # \"neo4j\" or \"networkx\"\n",
                "# Not needed if using networkx\n",
@ -577,6 +581,7 @@
                "\n",
                "import cognee\n",
                "\n",
+                "await cognee.prune.prune_data()\n",
                "await cognee.prune.prune_system(metadata=True)"
            ]
        },
@ -639,7 +644,8 @@
                "    chunks_into_graph, \\\n",
                "    source_documents_to_chunks, \\\n",
                "    check_permissions_on_documents, \\\n",
-                "    classify_documents\n",
+                "    classify_documents, \\\n",
+                "    chunk_naive_llm_classifier\n",
                "from cognee.tasks.summarization import summarize_text\n",
                "\n",
                "async def run_cognify_pipeline(dataset: Dataset, user: User = None):\n",
@ -667,6 +673,10 @@
                "                    summarization_model = cognee_config.summarization_model,\n",
                "                    collection_name = \"summaries\",\n",
                "                ),\n",
+                "             Task(\n",
+                "                    chunk_naive_llm_classifier,\n",
+                "                    classification_model = cognee_config.classification_model,\n",
+                "            ),\n",
                "            Task(chunk_remove_disconnected), # Remove the obsolete document chunks.\n",
                "        ]\n",
                "\n",
@ -876,7 +886,7 @@
    ],
    "metadata": {
        "kernelspec": {
-            "display_name": "cognee-bGi0WgSG-py3.9",
+            "display_name": ".venv",
            "language": "python",
            "name": "python3"
        },
@ -890,7 +900,7 @@
            "name": "python",
            "nbconvert_exporter": "python",
            "pygments_lexer": "ipython3",
-            "version": "3.9.5"
+            "version": "3.9.6"
        }
    },
    "nbformat": 4,
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -19,53 +19,51 @@ classifiers = [

 [tool.poetry.dependencies]
 python = ">=3.9.0,<3.12"
-openai = "1.27.0"
+openai = "1.52.0"
 pydantic = "2.8.2"
 python-dotenv = "1.0.1"
 fastapi = "^0.109.2"
 uvicorn = "0.22.0"
+requests = "2.32.3"
+aiohttp = "3.10.10"
+typing_extensions = "4.12.2"
+dspy = "2.5.25"
+nest_asyncio = "1.6.0"
+numpy = "1.26.4"
+datasets = "3.1.0"
+falkordb = "1.0.9"
 boto3 = "^1.26.125"
+botocore="^1.35.54"
 gunicorn = "^20.1.0"
 sqlalchemy = "2.0.35"
-instructor = "1.3.5"
+instructor = "1.6.3"
 networkx = "^3.2.1"
-debugpy = "1.8.2"
-pyarrow = "15.0.0"
-pylint = "^3.0.3"
 aiosqlite = "^0.20.0"
 pandas = "2.0.3"
-greenlet = "^3.0.3"
-ruff = "^0.2.2"
 filetype = "^1.2.0"
 nltk = "^3.8.1"
 dlt = {extras = ["sqlalchemy"], version = "^1.2.0"}
-overrides = "^7.7.0"
 aiofiles = "^23.2.1"
 qdrant-client = "^1.9.0"
 graphistry = "^0.33.5"
-tenacity = "^8.2.3"
+tenacity = "^9.0.0"
 weaviate-client = "4.6.7"
 scikit-learn = "^1.5.0"
-fastembed = "0.2.7"
 pypdf = "^4.1.0"
 neo4j = "^5.20.0"
 jinja2 = "^3.1.3"
 matplotlib = "^3.8.3"
-structlog = "^24.1.0"
 tiktoken = "0.7.0"
+langchain_text_splitters = "0.3.2" 
+langsmith = "0.1.139"
+langdetect = "1.0.9"
 posthog = "^3.5.0"
 lancedb = "0.8.0"
-litellm = "1.38.10"
+litellm = "1.49.1"
 groq = "0.8.0"
-tantivy = "^0.22.0"
-tokenizers ="0.15.2"
-transformers ="4.39.0"
-python-multipart = "^0.0.9"
 langfuse = "^2.32.0"
-protobuf = "<5.0.0"
 pydantic-settings = "^2.2.1"
 anthropic = "^0.26.1"
-pdfplumber = "^0.11.1"
 sentry-sdk = {extras = ["fastapi"], version = "^2.9.0"}
 fastapi-users = { version = "*", extras = ["sqlalchemy"] }
 asyncpg = "^0.29.0"
@ -88,6 +86,11 @@ pytest-asyncio = "^0.21.1"
 coverage = "^7.3.2"
 mypy = "^1.7.1"
 notebook = "^7.1.1"
+deptry = "^0.20.0"
+debugpy = "1.8.2"
+pylint = "^3.0.3"
+ruff = "^0.2.2"
+tweepy = "4.14.0"

 [tool.poetry.group.docs.dependencies]
 mkdocs-material = "^9.5.42"