Merge remote-tracking branch 'origin/main' into feat/COG-184-add-falkordb

2024-11-07 11:29:09 +01:00 · 2024-11-07 11:29:09 +01:00 · 758698a35b
commit 758698a35b
parent 14e2c7efbe 027773f5c4
49 changed files with 2109 additions and 1674 deletions
--- a/.github/workflows/auto-comment.yml
+++ b/.github/workflows/auto-comment.yml
@ -0,0 +1,81 @@
+name: Issue and PR Auto Comments
+on:
+  issues:
+    types:
+      - opened
+      - closed
+      - assigned
+  pull_request_target:
+    types:
+      - opened
+      - closed
+
+permissions:
+  contents: read
+
+jobs:
+  auto-comment:
+    permissions:
+      issues: write
+      pull-requests: write
+    runs-on: ubuntu-latest
+    steps:
+      # configuration for auto-comment actions
+      - name: Configure Auto Comments
+        uses: wow-actions/auto-comment@v1
+        with:
+          GITHUB_TOKEN: ${{ secrets.GH_TOKEN }}
+          issuesOpened: |
+            👋 @{{ author }}
+            
+            Thank you for raising an issue. We will investigate the matter and get back to you as soon as possible.
+            
+            To help us address your issue efficiently, please ensure you have provided:
+            - A clear description of the problem
+            - Steps to reproduce (if applicable)
+            - Expected vs actual behavior
+            - Any relevant screenshots or error messages
+            
+            Our team typically responds within 2-3 business days.
+
+          issuesClosed: |
+            ✅ @{{ author }}
+            
+            This issue has been closed. If you have any further questions or if the issue resurfaces, 
+            please feel free to:
+            - Add a comment to this thread
+            - Open a new issue with reference to this one
+            
+            Thank you for helping us improve!
+
+          pullRequestOpened: |
+            👍 @{{ author }}
+            
+            Thank you for your pull request and contributing to our community!
+            
+            Please ensure you have:
+            - [ ] Followed our contributing guidelines
+            - [ ] Added/updated tests (if applicable)
+            - [ ] Updated documentation (if applicable)
+            - [ ] Added a descriptive PR title
+            
+            Our team will review your contribution as soon as possible. Feel free to reach out if you need any assistance.
+
+      # Separate action for merged PRs
+      - name: Handle Merged Pull Requests
+        if: github.event.pull_request.merged == true
+        uses: actions-cool/pr-welcome@v1.2.1
+        with:
+          token: ${{ secrets.GH_TOKEN }}
+          comment: |
+            🎉 Fantastic work @${{ github.event.pull_request.user.login }}! 🎉
+            
+            Your pull request has been merged successfully. Thank you for your valuable contribution!
+            
+            We appreciate the time and effort you've put into improving our project.
+            Your changes will be included in our next release.
+            
+            Keep up the great work! 💪
+          emoji: 'rocket'
+          pr-emoji: '+1, heart, rocket'
+          
--- a/.github/workflows/docker_compose.yml
+++ b/.github/workflows/docker_compose.yml
@ -20,10 +20,16 @@ jobs:
      uses: docker/setup-buildx-action@v3

    - name: Build Docker images
+      env:
+        ENVIRONMENT: dev
+        ENV: dev
      run: |
        docker compose -f docker-compose.yml build

    - name: Run Docker Compose
+      env:
+        ENVIRONMENT: dev
+        ENV: dev
      run: |
        docker compose -f docker-compose.yml up -d

--- a/2
+++ b/2
@ -25,7 +25,7 @@ RUN pip install poetry
 RUN poetry config virtualenvs.create false

 # Install the dependencies
-RUN poetry install --no-root --no-dev
+RUN poetry install --all-extras --no-root --no-dev


 # Set the PYTHONPATH environment variable to include the /app directory
--- a/README.md
+++ b/README.md
@ -29,6 +29,10 @@ If you have questions, join our  <a href="https://discord.gg/NQPKmU5CCg">Discord
 pip install cognee
 ```

+### With pip with PostgreSQL support
+```bash
+pip install cognee[postgres]
+```

 ### With poetry

@ -36,6 +40,11 @@ pip install cognee
 poetry add cognee
 ```

+### With poetry with PostgreSQL support
+
+```bash
+poetry add cognee -E postgres
+```

 ## 💻 Basic Usage

@ -50,7 +59,7 @@ os.environ["LLM_API_KEY"] = "YOUR OPENAI_API_KEY"
 or 
 ```
 import cognee
-cognee.config.llm_api_key = "YOUR_OPENAI_API_KEY"
+cognee.config.set_llm_api_key("YOUR_OPENAI_API_KEY")
 ```
 You can also set the variables by creating .env file, here is our <a href="https://github.com/topoteretes/cognee/blob/main/.env.template">template.</a>
 To use different LLM providers, for more info check out our <a href="https://topoteretes.github.io/cognee">documentation</a>
@ -73,26 +82,54 @@ docker-compose up
 ```
 Then navigate to localhost:3000

+If you want to use the UI with PostgreSQL through docker-compose make sure to set the following values in the .env file: 
+```
+DB_PROVIDER=postgres
+
+DB_HOST=postgres
+DB_PORT=5432
+
+DB_NAME=cognee_db
+DB_USERNAME=cognee
+DB_PASSWORD=cognee
+``` 
+
 ### Simple example

-Run the default cognee pipeline:
+First, copy `.env.template` to `.env` and add your OpenAI API key to the LLM_API_KEY field. 

-```
+Optionally, set `VECTOR_DB_PROVIDER="lancedb"` in `.env` to simplify setup.
+
+This script will run the default pipeline:
+
+```python
 import cognee
+import asyncio
+from cognee.api.v1.search import SearchType

-text = """Natural language processing (NLP) is an interdisciplinary
-       subfield of computer science and information retrieval"""
+async def main():
+    await cognee.prune.prune_data()  # Reset cognee data
+    await cognee.prune.prune_system(metadata=True)  # Reset cognee system state

-await cognee.add(text) # Add a new piece of information
+    text = """
+    Natural language processing (NLP) is an interdisciplinary
+    subfield of computer science and information retrieval.
+    """

-await cognee.cognify() # Use LLMs and cognee to create a knowledge graph
+    await cognee.add(text)  # Add text to cognee
+    await cognee.cognify()  # Use LLMs and cognee to create knowledge graph

-search_results = await cognee.search("INSIGHTS", {'query': 'NLP'}) # Query cognee for the insights
+    search_results = await cognee.search(  # Search cognee for insights
+        SearchType.INSIGHTS,
+        {'query': 'Tell me about NLP'}
+    )

-for result in search_results:
-    do_something_with_result(result)
+    for result_text in search_results:  # Display results
+        print(result_text)
+
+asyncio.run(main())
 ```
-
+A version of this example is here: `examples/pyton/simple_example.py`

 ### Create your own memory store

--- a/cognee/api/client.py
+++ b/cognee/api/client.py
@ -1,24 +1,11 @@
 """ FastAPI server for the Cognee API. """
-from datetime import datetime
 import os
-from uuid import UUID
-import aiohttp
 import uvicorn
 import logging
 import sentry_sdk
-from typing import List, Union, Optional, Literal
-from typing_extensions import Annotated
-from fastapi import FastAPI, HTTPException, Form, UploadFile, Query, Depends
-from fastapi.responses import JSONResponse, FileResponse, Response
+from fastapi import FastAPI
+from fastapi.responses import JSONResponse, Response
 from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel
-
-from cognee.api.DTO import InDTO, OutDTO
-from cognee.api.v1.search import SearchType
-from cognee.modules.users.models import User
-from cognee.modules.users.methods import get_authenticated_user
-from cognee.modules.pipelines.models import PipelineRunStatus
-

 # Set up logging
 logging.basicConfig(
@ -65,9 +52,12 @@ app.add_middleware(

 from cognee.api.v1.users.routers import get_auth_router, get_register_router,\
    get_reset_password_router, get_verify_router, get_users_router
-
-from cognee.api.v1.permissions.get_permissions_router import get_permissions_router
-
+from cognee.api.v1.permissions.routers import get_permissions_router
+from cognee.api.v1.settings.routers import get_settings_router
+from cognee.api.v1.datasets.routers import get_datasets_router
+from cognee.api.v1.cognify.routers import get_cognify_router
+from cognee.api.v1.search.routers import get_search_router
+from cognee.api.v1.add.routers import get_add_router

 from fastapi import Request
 from fastapi.encoders import jsonable_encoder
@ -137,261 +127,35 @@ def health_check():
    """
    return Response(status_code = 200)

+app.include_router(
+    get_datasets_router(),
+    prefix="/api/v1/datasets",
+    tags=["datasets"]
+)

-class ErrorResponseDTO(BaseModel):
-    message: str
+app.include_router(
+    get_add_router(),
+    prefix="/api/v1/add",
+    tags=["add"]
+)

+app.include_router(
+    get_cognify_router(),
+    prefix="/api/v1/cognify",
+    tags=["cognify"]
+)

-class DatasetDTO(OutDTO):
-    id: UUID
-    name: str
-    created_at: datetime
-    updated_at: Optional[datetime]
-    owner_id: UUID
-
-@app.get("/api/v1/datasets", response_model = list[DatasetDTO])
-async def get_datasets(user: User = Depends(get_authenticated_user)):
-    try:
-        from cognee.modules.data.methods import get_datasets
-        datasets = await get_datasets(user.id)
-
-        return datasets
-    except Exception as error:
-        logger.error(f"Error retrieving datasets: {str(error)}")
-        raise HTTPException(status_code = 500, detail = f"Error retrieving datasets: {str(error)}") from error
-
-
-@app.delete("/api/v1/datasets/{dataset_id}", response_model = None, responses = { 404: { "model": ErrorResponseDTO }})
-async def delete_dataset(dataset_id: str, user: User = Depends(get_authenticated_user)):
-    from cognee.modules.data.methods import get_dataset, delete_dataset
-
-    dataset = await get_dataset(user.id, dataset_id)
-
-    if dataset is None:
-        raise HTTPException(
-            status_code = 404,
-            detail = f"Dataset ({dataset_id}) not found."
-        )
-
-    await delete_dataset(dataset)
-
-
-@app.get("/api/v1/datasets/{dataset_id}/graph", response_model = str)
-async def get_dataset_graph(dataset_id: str, user: User = Depends(get_authenticated_user)):
-    from cognee.shared.utils import render_graph
-    from cognee.infrastructure.databases.graph import get_graph_engine
-
-    try:
-        graph_client = await get_graph_engine()
-        graph_url = await render_graph(graph_client.graph)
-
-        return JSONResponse(
-            status_code = 200,
-            content = str(graph_url),
-        )
-    except:
-        return JSONResponse(
-            status_code = 409,
-            content = "Graphistry credentials are not set. Please set them in your .env file.",
-        )
-
-
-class DataDTO(OutDTO):
-    id: UUID
-    name: str
-    created_at: datetime
-    updated_at: Optional[datetime]
-    extension: str
-    mime_type: str
-    raw_data_location: str
-
-@app.get("/api/v1/datasets/{dataset_id}/data", response_model = list[DataDTO], responses = { 404: { "model": ErrorResponseDTO }})
-async def get_dataset_data(dataset_id: str, user: User = Depends(get_authenticated_user)):
-    from cognee.modules.data.methods import get_dataset_data, get_dataset
-
-    dataset = await get_dataset(user.id, dataset_id)
-
-    if dataset is None:
-        return JSONResponse(
-            status_code = 404,
-            content = ErrorResponseDTO(f"Dataset ({dataset_id}) not found."),
-        )
-
-    dataset_data = await get_dataset_data(dataset_id = dataset.id)
-
-    if dataset_data is None:
-        return []
-
-    return dataset_data
-
-
-@app.get("/api/v1/datasets/status", response_model = dict[str, PipelineRunStatus])
-async def get_dataset_status(datasets: Annotated[List[str], Query(alias="dataset")] = None, user: User = Depends(get_authenticated_user)):
-    from cognee.api.v1.datasets.datasets import datasets as cognee_datasets
-
-    try:
-        datasets_statuses = await cognee_datasets.get_status(datasets)
-
-        return datasets_statuses
-    except Exception as error:
-        return JSONResponse(
-            status_code = 409,
-            content = {"error": str(error)}
-        )
-
-
-@app.get("/api/v1/datasets/{dataset_id}/data/{data_id}/raw", response_class = FileResponse)
-async def get_raw_data(dataset_id: str, data_id: str, user: User = Depends(get_authenticated_user)):
-    from cognee.modules.data.methods import get_dataset, get_dataset_data
-
-    dataset = await get_dataset(user.id, dataset_id)
-
-    if dataset is None:
-        return JSONResponse(
-            status_code = 404,
-            content = {
-                "detail": f"Dataset ({dataset_id}) not found."
-            }
-        )
-
-    dataset_data = await get_dataset_data(dataset.id)
-
-    if dataset_data is None:
-        raise HTTPException(status_code = 404, detail = f"Dataset ({dataset_id}) not found.")
-
-    data = [data for data in dataset_data if str(data.id) == data_id][0]
-
-    if data is None:
-        return JSONResponse(
-            status_code = 404,
-            content = {
-                "detail": f"Data ({data_id}) not found in dataset ({dataset_id})."
-            }
-        )
-
-    return data.raw_data_location
-
-
-@app.post("/api/v1/add", response_model = None)
-async def add(
-    data: List[UploadFile],
-    datasetId: str = Form(...),
-    user: User = Depends(get_authenticated_user),
-):
-    """ This endpoint is responsible for adding data to the graph."""
-    from cognee.api.v1.add import add as cognee_add
-    try:
-        if isinstance(data, str) and data.startswith("http"):
-            if "github" in data:
-                # Perform git clone if the URL is from GitHub
-                repo_name = data.split("/")[-1].replace(".git", "")
-                os.system(f"git clone {data} .data/{repo_name}")
-                await cognee_add(
-                    "data://.data/",
-                    f"{repo_name}",
-                )
-            else:
-                # Fetch and store the data from other types of URL using curl
-                async with aiohttp.ClientSession() as session:
-                    async with session.get(data) as resp:
-                        if resp.status == 200:
-                            file_data = await resp.read()
-                            with open(f".data/{data.split('/')[-1]}", "wb") as f:
-                                f.write(file_data)
-                            await cognee_add(
-                                "data://.data/",
-                                f"{data.split('/')[-1]}",
-                            )
-        else:
-            await cognee_add(
-                data,
-                datasetId,
-                user = user,
-            )
-    except Exception as error:
-        return JSONResponse(
-            status_code = 409,
-            content = {"error": str(error)}
-        )
-
-
-class CognifyPayloadDTO(BaseModel):
-    datasets: List[str]
-
-@app.post("/api/v1/cognify", response_model = None)
-async def cognify(payload: CognifyPayloadDTO, user: User = Depends(get_authenticated_user)):
-    """ This endpoint is responsible for the cognitive processing of the content."""
-    from cognee.api.v1.cognify.cognify_v2 import cognify as cognee_cognify
-    try:
-        await cognee_cognify(payload.datasets, user)
-    except Exception as error:
-        return JSONResponse(
-            status_code = 409,
-            content = {"error": str(error)}
-        )
-
-
-class SearchPayloadDTO(InDTO):
-    search_type: SearchType
-    query: str
-
-@app.post("/api/v1/search", response_model = list)
-async def search(payload: SearchPayloadDTO, user: User = Depends(get_authenticated_user)):
-    """ This endpoint is responsible for searching for nodes in the graph."""
-    from cognee.api.v1.search import search as cognee_search
-
-    try:
-        results = await cognee_search(payload.search_type, payload.query, user)
-
-        return results
-    except Exception as error:
-        return JSONResponse(
-            status_code = 409,
-            content = {"error": str(error)}
-        )
-
-from cognee.modules.settings.get_settings import LLMConfig, VectorDBConfig
-
-class LLMConfigDTO(OutDTO, LLMConfig):
-    pass
-
-class VectorDBConfigDTO(OutDTO, VectorDBConfig):
-    pass
-
-class SettingsDTO(OutDTO):
-    llm: LLMConfigDTO
-    vector_db: VectorDBConfigDTO
-
-@app.get("/api/v1/settings", response_model = SettingsDTO)
-async def get_settings(user: User = Depends(get_authenticated_user)):
-    from cognee.modules.settings import get_settings as get_cognee_settings
-    return get_cognee_settings()
-
-
-class LLMConfigDTO(InDTO):
-    provider: Union[Literal["openai"], Literal["ollama"], Literal["anthropic"]]
-    model: str
-    api_key: str
-
-class VectorDBConfigDTO(InDTO):
-    provider: Union[Literal["lancedb"], Literal["qdrant"], Literal["weaviate"], Literal["pgvector"]]
-    url: str
-    api_key: str
-
-class SettingsPayloadDTO(InDTO):
-    llm: Optional[LLMConfigDTO] = None
-    vector_db: Optional[VectorDBConfigDTO] = None
-
-@app.post("/api/v1/settings", response_model = None)
-async def save_settings(new_settings: SettingsPayloadDTO, user: User = Depends(get_authenticated_user)):
-    from cognee.modules.settings import save_llm_config, save_vector_db_config
-
-    if new_settings.llm is not None:
-        await save_llm_config(new_settings.llm)
-
-    if new_settings.vector_db is not None:
-        await save_vector_db_config(new_settings.vector_db)
+app.include_router(
+    get_search_router(),
+    prefix="/api/v1/search",
+    tags=["search"]
+)

+app.include_router(
+    get_settings_router(),
+    prefix="/api/v1/settings",
+    tags=["settings"]
+)

 def start_api_server(host: str = "0.0.0.0", port: int = 8000):
    """
--- a/cognee/api/v1/add/routers/init.py
+++ b/cognee/api/v1/add/routers/init.py
@ -0,0 +1 @@
+from .get_add_router import get_add_router
--- a/cognee/api/v1/add/routers/get_add_router.py
+++ b/cognee/api/v1/add/routers/get_add_router.py
@ -0,0 +1,60 @@
+from fastapi import Form, UploadFile, Depends
+from fastapi.responses import JSONResponse
+from fastapi import APIRouter
+from typing import List
+import aiohttp
+import subprocess
+import logging
+import os
+from cognee.modules.users.models import User
+from cognee.modules.users.methods import get_authenticated_user
+
+logger = logging.getLogger(__name__)
+
+def get_add_router() -> APIRouter:
+    router = APIRouter()
+
+    @router.post("/", response_model=None)
+    async def add(
+            data: List[UploadFile],
+            datasetId: str = Form(...),
+            user: User = Depends(get_authenticated_user),
+    ):
+        """ This endpoint is responsible for adding data to the graph."""
+        from cognee.api.v1.add import add as cognee_add
+        try:
+            if isinstance(data, str) and data.startswith("http"):
+                if "github" in data:
+                    # Perform git clone if the URL is from GitHub
+                    repo_name = data.split("/")[-1].replace(".git", "")
+                    subprocess.run(["git", "clone", data, f".data/{repo_name}"], check=True)
+                    await cognee_add(
+                        "data://.data/",
+                        f"{repo_name}",
+                    )
+                else:
+                    # Fetch and store the data from other types of URL using curl
+                    async with aiohttp.ClientSession() as session:
+                        async with session.get(data) as resp:
+                            if resp.status == 200:
+                                file_data = await resp.read()
+                                filename = os.path.basename(data)
+                                with open(f".data/{filename}", "wb") as f:
+                                    f.write(file_data)
+                                await cognee_add(
+                                    "data://.data/",
+                                    f"{data.split('/')[-1]}",
+                                )
+            else:
+                await cognee_add(
+                    data,
+                    datasetId,
+                    user=user,
+                )
+        except Exception as error:
+            return JSONResponse(
+                status_code=409,
+                content={"error": str(error)}
+            )
+
+    return router
--- a/cognee/api/v1/cognify/routers/init.py
+++ b/cognee/api/v1/cognify/routers/init.py
@ -0,0 +1 @@
+from .get_cognify_router import get_cognify_router
--- a/cognee/api/v1/cognify/routers/get_cognify_router.py
+++ b/cognee/api/v1/cognify/routers/get_cognify_router.py
@ -0,0 +1,27 @@
+from fastapi import APIRouter
+from typing import List
+from pydantic import BaseModel
+from cognee.modules.users.models import User
+from fastapi.responses import JSONResponse
+from cognee.modules.users.methods import get_authenticated_user
+from fastapi import Depends
+
+class CognifyPayloadDTO(BaseModel):
+    datasets: List[str]
+
+def get_cognify_router() -> APIRouter:
+    router = APIRouter()
+
+    @router.post("/", response_model=None)
+    async def cognify(payload: CognifyPayloadDTO, user: User = Depends(get_authenticated_user)):
+        """ This endpoint is responsible for the cognitive processing of the content."""
+        from cognee.api.v1.cognify.cognify_v2 import cognify as cognee_cognify
+        try:
+            await cognee_cognify(payload.datasets, user)
+        except Exception as error:
+            return JSONResponse(
+                status_code=409,
+                content={"error": str(error)}
+            )
+
+    return router
--- a/cognee/api/v1/config/config.py
+++ b/cognee/api/v1/config/config.py
@ -5,6 +5,7 @@ from cognee.modules.cognify.config import get_cognify_config
 from cognee.infrastructure.data.chunking.config import get_chunk_config
 from cognee.infrastructure.databases.vector import get_vectordb_config
 from cognee.infrastructure.databases.graph.config import get_graph_config
+from cognee.infrastructure.llm.config import get_llm_config
 from cognee.infrastructure.databases.relational import get_relational_config
 from cognee.infrastructure.files.storage import LocalStorage

@ -55,19 +56,36 @@ class config():
        graph_config.graph_database_provider = graph_database_provider

    @staticmethod
-    def llm_provider(llm_provider: str):
-        graph_config = get_graph_config()
-        graph_config.llm_provider = llm_provider
+    def set_llm_provider(llm_provider: str):
+        llm_config = get_llm_config()
+        llm_config.llm_provider = llm_provider

    @staticmethod
-    def llm_endpoint(llm_endpoint: str):
-        graph_config = get_graph_config()
-        graph_config.llm_endpoint = llm_endpoint
+    def set_llm_endpoint(llm_endpoint: str):
+        llm_config = get_llm_config()
+        llm_config.llm_endpoint = llm_endpoint

    @staticmethod
-    def llm_model(llm_model: str):
-        graph_config = get_graph_config()
-        graph_config.llm_model = llm_model
+    def set_llm_model(llm_model: str):
+        llm_config = get_llm_config()
+        llm_config.llm_model = llm_model
+
+    @staticmethod
+    def set_llm_api_key(llm_api_key: str):
+        llm_config = get_llm_config()
+        llm_config.llm_api_key = llm_api_key
+
+    @staticmethod
+    def set_llm_config(config_dict: dict):
+        """
+            Updates the llm config with values from config_dict.
+        """
+        llm_config = get_llm_config()
+        for key, value in config_dict.items():
+            if hasattr(llm_config, key):
+                object.__setattr__(llm_config, key, value)
+            else:
+                raise AttributeError(f"'{key}' is not a valid attribute of the config.")

    @staticmethod
    def set_chunk_strategy(chunk_strategy: object):
@ -137,5 +155,5 @@ class config():
        if "username" not in graphistry_config or "password" not in graphistry_config:
            raise ValueError("graphistry_config dictionary must contain 'username' and 'password' keys.")

-        base_config.graphistry_username = graphistry_config.username
-        base_config.graphistry_password = graphistry_config.password
+        base_config.graphistry_username = graphistry_config.get("username")
+        base_config.graphistry_password = graphistry_config.get("password")
--- a/cognee/api/v1/datasets/routers/init.py
+++ b/cognee/api/v1/datasets/routers/init.py
@ -0,0 +1 @@
+from .get_datasets_router import get_datasets_router
--- a/cognee/api/v1/datasets/routers/get_datasets_router.py
+++ b/cognee/api/v1/datasets/routers/get_datasets_router.py
@ -0,0 +1,178 @@
+import logging
+from fastapi import APIRouter
+from datetime import datetime
+from uuid import UUID
+from typing import List, Optional
+from typing_extensions import Annotated
+from fastapi import HTTPException, Query, Depends
+from fastapi.responses import JSONResponse, FileResponse
+from pydantic import BaseModel
+
+from cognee.api.DTO import OutDTO
+from cognee.modules.users.models import User
+from cognee.modules.users.methods import get_authenticated_user
+from cognee.modules.pipelines.models import PipelineRunStatus
+
+logger = logging.getLogger(__name__)
+
+class ErrorResponseDTO(BaseModel):
+    message: str
+
+class DatasetDTO(OutDTO):
+    id: UUID
+    name: str
+    created_at: datetime
+    updated_at: Optional[datetime] = None
+    owner_id: UUID
+
+class DataDTO(OutDTO):
+    id: UUID
+    name: str
+    created_at: datetime
+    updated_at: Optional[datetime] = None
+    extension: str
+    mime_type: str
+    raw_data_location: str
+
+def get_datasets_router() -> APIRouter:
+    router = APIRouter()
+
+    @router.get("/", response_model=list[DatasetDTO])
+    async def get_datasets(user: User = Depends(get_authenticated_user)):
+        try:
+            from cognee.modules.data.methods import get_datasets
+            datasets = await get_datasets(user.id)
+
+            return datasets
+        except Exception as error:
+            logger.error(f"Error retrieving datasets: {str(error)}")
+            raise HTTPException(status_code=500, detail=f"Error retrieving datasets: {str(error)}") from error
+
+    @router.delete("/{dataset_id}", response_model=None, responses={404: {"model": ErrorResponseDTO}})
+    async def delete_dataset(dataset_id: str, user: User = Depends(get_authenticated_user)):
+        from cognee.modules.data.methods import get_dataset, delete_dataset
+
+        dataset = await get_dataset(user.id, dataset_id)
+
+        if dataset is None:
+            raise HTTPException(
+                status_code=404,
+                detail=f"Dataset ({dataset_id}) not found."
+            )
+
+        await delete_dataset(dataset)
+
+    @router.delete("/{dataset_id}/data/{data_id}", response_model=None, responses={404: {"model": ErrorResponseDTO}})
+    async def delete_data(dataset_id: str, data_id: str, user: User = Depends(get_authenticated_user)):
+        from cognee.modules.data.methods import get_data, delete_data
+        from cognee.modules.data.methods import get_dataset
+
+        # Check if user has permission to access dataset and data by trying to get the dataset
+        dataset = await get_dataset(user.id, dataset_id)
+
+        #TODO: Handle situation differently if user doesn't have permission to access data?
+        if dataset is None:
+            raise HTTPException(
+                status_code=404,
+                detail=f"Dataset ({dataset_id}) not found."
+            )
+
+        data = await get_data(data_id)
+
+        if data is None:
+            raise HTTPException(
+                status_code=404,
+                detail=f"Dataset ({data_id}) not found."
+            )
+
+        await delete_data(data)
+
+    @router.get("/{dataset_id}/graph", response_model=str)
+    async def get_dataset_graph(dataset_id: str, user: User = Depends(get_authenticated_user)):
+        from cognee.shared.utils import render_graph
+        from cognee.infrastructure.databases.graph import get_graph_engine
+
+        try:
+            graph_client = await get_graph_engine()
+            graph_url = await render_graph(graph_client.graph)
+
+            return JSONResponse(
+                status_code=200,
+                content=str(graph_url),
+            )
+        except:
+            return JSONResponse(
+                status_code=409,
+                content="Graphistry credentials are not set. Please set them in your .env file.",
+            )
+
+    @router.get("/{dataset_id}/data", response_model=list[DataDTO],
+             responses={404: {"model": ErrorResponseDTO}})
+    async def get_dataset_data(dataset_id: str, user: User = Depends(get_authenticated_user)):
+        from cognee.modules.data.methods import get_dataset_data, get_dataset
+
+        dataset = await get_dataset(user.id, dataset_id)
+
+        if dataset is None:
+            return JSONResponse(
+                status_code=404,
+                content=ErrorResponseDTO(f"Dataset ({dataset_id}) not found."),
+            )
+
+        dataset_data = await get_dataset_data(dataset_id=dataset.id)
+
+        if dataset_data is None:
+            return []
+
+        return dataset_data
+
+    @router.get("/status", response_model=dict[str, PipelineRunStatus])
+    async def get_dataset_status(datasets: Annotated[List[str], Query(alias="dataset")] = None,
+                                 user: User = Depends(get_authenticated_user)):
+        from cognee.api.v1.datasets.datasets import datasets as cognee_datasets
+
+        try:
+            datasets_statuses = await cognee_datasets.get_status(datasets)
+
+            return datasets_statuses
+        except Exception as error:
+            return JSONResponse(
+                status_code=409,
+                content={"error": str(error)}
+            )
+
+    @router.get("/{dataset_id}/data/{data_id}/raw", response_class=FileResponse)
+    async def get_raw_data(dataset_id: str, data_id: str, user: User = Depends(get_authenticated_user)):
+        from cognee.modules.data.methods import get_dataset, get_dataset_data
+
+        dataset = await get_dataset(user.id, dataset_id)
+
+        if dataset is None:
+            return JSONResponse(
+                status_code=404,
+                content={
+                    "detail": f"Dataset ({dataset_id}) not found."
+                }
+            )
+
+        dataset_data = await get_dataset_data(dataset.id)
+
+        if dataset_data is None:
+            raise HTTPException(status_code=404, detail=f"No data found in dataset ({dataset_id}).")
+
+        matching_data = [data for data in dataset_data if str(data.id) == data_id]
+
+        # Check if matching_data contains an element
+        if len(matching_data) == 0:
+            return JSONResponse(
+                status_code=404,
+                content={
+                    "detail": f"Data ({data_id}) not found in dataset ({dataset_id})."
+                }
+            )
+
+        data = matching_data[0]
+
+        return data.raw_data_location
+
+    return router
--- a/cognee/api/v1/permissions/routers/init.py
+++ b/cognee/api/v1/permissions/routers/init.py
@ -0,0 +1 @@
+from .get_permissions_router import get_permissions_router
--- a/cognee/api/v1/permissions/routers/get_permissions_router.py
+++ b/cognee/api/v1/permissions/routers/get_permissions_router.py
--- a/cognee/api/v1/search/routers/init.py
+++ b/cognee/api/v1/search/routers/init.py
@ -0,0 +1 @@
+from .get_search_router import get_search_router
--- a/cognee/api/v1/search/routers/get_search_router.py
+++ b/cognee/api/v1/search/routers/get_search_router.py
@ -0,0 +1,31 @@
+from cognee.api.v1.search import SearchType
+from fastapi.responses import JSONResponse
+from cognee.modules.users.models import User
+from fastapi import Depends, APIRouter
+from cognee.api.DTO import InDTO
+from cognee.modules.users.methods import get_authenticated_user
+
+
+class SearchPayloadDTO(InDTO):
+    search_type: SearchType
+    query: str
+
+def get_search_router() -> APIRouter:
+    router = APIRouter()
+
+    @router.post("/", response_model = list)
+    async def search(payload: SearchPayloadDTO, user: User = Depends(get_authenticated_user)):
+        """ This endpoint is responsible for searching for nodes in the graph."""
+        from cognee.api.v1.search import search as cognee_search
+
+        try:
+            results = await cognee_search(payload.search_type, payload.query, user)
+
+            return results
+        except Exception as error:
+            return JSONResponse(
+                status_code = 409,
+                content = {"error": str(error)}
+            )
+
+    return router
--- a/cognee/api/v1/settings/routers/init.py
+++ b/cognee/api/v1/settings/routers/init.py
@ -0,0 +1 @@
+from .get_settings_router import get_settings_router
--- a/cognee/api/v1/settings/routers/get_settings_router.py
+++ b/cognee/api/v1/settings/routers/get_settings_router.py
@ -0,0 +1,51 @@
+from fastapi import APIRouter
+from cognee.api.DTO import InDTO, OutDTO
+from typing import Union, Optional, Literal
+from cognee.modules.users.methods import get_authenticated_user
+from fastapi import Depends
+from cognee.modules.users.models import User
+from cognee.modules.settings.get_settings import LLMConfig, VectorDBConfig
+
+class LLMConfigOutputDTO(OutDTO, LLMConfig):
+    pass
+
+class VectorDBConfigOutputDTO(OutDTO, VectorDBConfig):
+    pass
+
+class SettingsDTO(OutDTO):
+    llm: LLMConfigOutputDTO
+    vector_db: VectorDBConfigOutputDTO
+
+class LLMConfigInputDTO(InDTO):
+    provider: Union[Literal["openai"], Literal["ollama"], Literal["anthropic"]]
+    model: str
+    api_key: str
+
+class VectorDBConfigInputDTO(InDTO):
+    provider: Union[Literal["lancedb"], Literal["qdrant"], Literal["weaviate"], Literal["pgvector"]]
+    url: str
+    api_key: str
+
+class SettingsPayloadDTO(InDTO):
+    llm: Optional[LLMConfigInputDTO] = None
+    vector_db: Optional[VectorDBConfigInputDTO] = None
+
+def get_settings_router() -> APIRouter:
+    router = APIRouter()
+
+    @router.get("/", response_model=SettingsDTO)
+    async def get_settings(user: User = Depends(get_authenticated_user)):
+        from cognee.modules.settings import get_settings as get_cognee_settings
+        return get_cognee_settings()
+
+    @router.post("/", response_model=None)
+    async def save_settings(new_settings: SettingsPayloadDTO, user: User = Depends(get_authenticated_user)):
+        from cognee.modules.settings import save_llm_config, save_vector_db_config
+
+        if new_settings.llm is not None:
+            await save_llm_config(new_settings.llm)
+
+        if new_settings.vector_db is not None:
+            await save_vector_db_config(new_settings.vector_db)
+
+    return router
--- a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py
+++ b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py
@ -89,10 +89,22 @@ class SQLAlchemyAdapter():
        """
        Delete data in given table based on id. Table must have an id Column.
        """
-        async with self.get_async_session() as session:
-            TableModel = await self.get_table(table_name, schema_name)
-            await session.execute(TableModel.delete().where(TableModel.c.id == data_id))
-            await session.commit()
+        if self.engine.dialect.name == "sqlite":
+            async with self.get_async_session() as session:
+                TableModel = await self.get_table(table_name, schema_name)
+
+                # Foreign key constraints are disabled by default in SQLite (for backwards compatibility),
+                # so must be enabled for each database connection/session separately.
+                await session.execute(text("PRAGMA foreign_keys = ON;"))
+
+                await session.execute(TableModel.delete().where(TableModel.c.id == data_id))
+                await session.commit()
+        else:
+            async with self.get_async_session() as session:
+                TableModel = await self.get_table(table_name, schema_name)
+                await session.execute(TableModel.delete().where(TableModel.c.id == data_id))
+                await session.commit()
+

    async def get_table(self, table_name: str, schema_name: Optional[str] = "public") -> Table:
        """
--- a/cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py
+++ b/cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py
@ -1,25 +0,0 @@
-from typing import List, Optional
-from fastembed import TextEmbedding
-from cognee.root_dir import get_absolute_path
-from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
-
-class FastembedEmbeddingEngine(EmbeddingEngine):
-    embedding_model: str
-    embedding_dimensions: int
-
-    def __init__(
-        self,
-        embedding_model: Optional[str] = "BAAI/bge-large-en-v1.5",
-        embedding_dimensions: Optional[int] = 1024,
-    ):
-        self.embedding_model = embedding_model
-        self.embedding_dimensions = embedding_dimensions
-
-    async def embed_text(self, text: List[str]) -> List[float]:
-        embedding_model = TextEmbedding(model_name = self.embedding_model, cache_dir = get_absolute_path("cache/embeddings"))
-        embeddings_list = list(map(lambda embedding: embedding.tolist(), embedding_model.embed(text)))
-
-        return embeddings_list
-
-    def get_vector_size(self) -> int:
-        return self.embedding_dimensions
--- a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py
+++ b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py
@ -193,7 +193,10 @@ class LanceDBAdapter(VectorDBInterface):
    async def delete_data_points(self, collection_name: str, data_point_ids: list[str]):
        connection = await self.get_connection()
        collection = await connection.open_table(collection_name)
-        results = await collection.delete(f"id IN {tuple(data_point_ids)}")
+        if len(data_point_ids) == 1:
+            results = await collection.delete(f"id = '{data_point_ids[0]}'")
+        else:
+            results = await collection.delete(f"id IN {tuple(data_point_ids)}")
        return results

    async def create_vector_index(self, index_name: str, index_property_name: str):
--- a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py
+++ b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py
@ -33,7 +33,6 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
        self.api_key = api_key
        self.embedding_engine = embedding_engine
        self.db_uri: str = connection_string
-
        self.engine = create_async_engine(self.db_uri)
        self.sessionmaker = async_sessionmaker(bind=self.engine, expire_on_commit=False)

--- a/cognee/infrastructure/files/utils/extract_text_from_file.py
+++ b/cognee/infrastructure/files/utils/extract_text_from_file.py
@ -1,7 +1,8 @@
 from typing import BinaryIO
 from pypdf import PdfReader
+import filetype

-def extract_text_from_file(file: BinaryIO, file_type) -> str:
+def extract_text_from_file(file: BinaryIO, file_type: filetype.Type) -> str:
    """Extract text from a file"""
    if file_type.extension == "pdf":
        reader = PdfReader(stream = file)
--- a/cognee/infrastructure/files/utils/get_file_size.py
+++ b/cognee/infrastructure/files/utils/get_file_size.py
@ -1,5 +0,0 @@
-import os
-
-def get_file_size(file_path: str):
-    """Get the size of a file"""
-    return os.path.getsize(file_path)
--- a/cognee/modules/cognify/evaluate.py
+++ b/cognee/modules/cognify/evaluate.py
@ -1,4 +1,3 @@
-import dsp
 import dspy
 from dspy.evaluate.evaluate import Evaluate
 from dspy.primitives.example import Example
--- a/cognee/modules/cognify/train.py
+++ b/cognee/modules/cognify/train.py
@ -1,4 +1,3 @@
-import dsp
 import dspy
 from dspy.teleprompt import BootstrapFewShot
 from dspy.primitives.example import Example
--- a/cognee/modules/data/methods/init.py
+++ b/cognee/modules/data/methods/init.py
@ -6,6 +6,8 @@ from .get_dataset import get_dataset
 from .get_datasets import get_datasets
 from .get_datasets_by_name import get_datasets_by_name
 from .get_dataset_data import get_dataset_data
+from .get_data import get_data

 # Delete
 from .delete_dataset import delete_dataset
+from .delete_data import delete_data
--- a/cognee/modules/data/methods/delete_data.py
+++ b/cognee/modules/data/methods/delete_data.py
@ -0,0 +1,19 @@
+from cognee.modules.data.models import Data
+from cognee.infrastructure.databases.relational import get_relational_engine
+
+
+async def delete_data(data: Data):
+    """Delete a data record from the database.
+
+       Args:
+           data (Data): The data object to be deleted.
+
+       Raises:
+           ValueError: If the data object is invalid.
+    """
+    if not hasattr(data, '__tablename__'):
+        raise ValueError("The provided data object is missing the required '__tablename__' attribute.")
+
+    db_engine = get_relational_engine()
+
+    return await db_engine.delete_data_by_id(data.__tablename__, data.id)
--- a/cognee/modules/data/methods/delete_dataset.py
+++ b/cognee/modules/data/methods/delete_dataset.py
@ -4,4 +4,4 @@ from cognee.infrastructure.databases.relational import get_relational_engine
 async def delete_dataset(dataset: Dataset):
    db_engine = get_relational_engine()

-    return await db_engine.delete_table(dataset.id)
+    return await db_engine.delete_data_by_id(dataset.__tablename__, dataset.id)
--- a/cognee/modules/data/methods/get_data.py
+++ b/cognee/modules/data/methods/get_data.py
@ -0,0 +1,20 @@
+from uuid import UUID
+from typing import Optional
+from cognee.infrastructure.databases.relational import get_relational_engine
+from ..models import Data
+
+async def get_data(data_id: UUID) -> Optional[Data]:
+    """Retrieve data by ID.
+
+        Args:
+            data_id (UUID): ID of the data to retrieve
+
+        Returns:
+            Optional[Data]: The requested data object if found, None otherwise
+    """
+    db_engine = get_relational_engine()
+
+    async with db_engine.get_async_session() as session:
+        data = await session.get(Data, data_id)
+
+        return data
--- a/cognee/modules/data/methods/get_dataset.py
+++ b/cognee/modules/data/methods/get_dataset.py
@ -1,8 +1,9 @@
+from typing import Optional
 from uuid import UUID
 from cognee.infrastructure.databases.relational import get_relational_engine
 from ..models import Dataset

-async def get_dataset(user_id: UUID, dataset_id: UUID) -> Dataset:
+async def get_dataset(user_id: UUID, dataset_id: UUID) -> Optional[Dataset]:
    db_engine = get_relational_engine()

    async with db_engine.get_async_session() as session:
--- a/cognee/modules/data/models/Data.py
+++ b/cognee/modules/data/models/Data.py
@ -20,9 +20,11 @@ class Data(Base):
    updated_at = Column(DateTime(timezone = True), onupdate = lambda: datetime.now(timezone.utc))

    datasets: Mapped[List["Dataset"]] = relationship(
+        "Dataset",
        secondary = DatasetData.__tablename__,
        back_populates = "data",
        lazy = "noload",
+        cascade="all, delete"
    )

    def to_json(self) -> dict:
--- a/cognee/modules/data/models/Dataset.py
+++ b/cognee/modules/data/models/Dataset.py
@ -19,9 +19,11 @@ class Dataset(Base):
    owner_id = Column(UUID, index = True)

    data: Mapped[List["Data"]] = relationship(
+        "Data",
        secondary = DatasetData.__tablename__,
        back_populates = "datasets",
        lazy = "noload",
+        cascade="all, delete"
    )

    def to_json(self) -> dict:
--- a/cognee/modules/data/models/DatasetData.py
+++ b/cognee/modules/data/models/DatasetData.py
@ -7,5 +7,5 @@ class DatasetData(Base):

    created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))

-    dataset_id = Column(UUID, ForeignKey("datasets.id"), primary_key = True)
-    data_id = Column(UUID, ForeignKey("data.id"), primary_key = True)
+    dataset_id = Column(UUID, ForeignKey("datasets.id", ondelete="CASCADE"), primary_key = True)
+    data_id = Column(UUID, ForeignKey("data.id", ondelete="CASCADE"), primary_key = True)
--- a/cognee/modules/pipelines/Pipeline.py
+++ b/cognee/modules/pipelines/Pipeline.py
@ -5,7 +5,7 @@ from .models.Task import Task

 class PipelineConfig(BaseModel):
    batch_count: int = 10
-    description: Optional[str]
+    description: Optional[str] = None

 class Pipeline():
    id: UUID = uuid4()
--- a/cognee/modules/pipelines/operations/run_parallel.py
+++ b/cognee/modules/pipelines/operations/run_parallel.py
@ -1,8 +1,8 @@
-from typing import Any, Callable, Generator
+from typing import Any, Callable, Generator, List
 import asyncio
 from ..tasks.Task import Task

-def run_tasks_parallel(tasks: [Task]) -> Callable[[Any], Generator[Any, Any, Any]]:
+def run_tasks_parallel(tasks: List[Task]) -> Callable[[Any], Generator[Any, Any, Any]]:
    async def parallel_run(*args, **kwargs):
        parallel_tasks = [asyncio.create_task(task.run(*args, **kwargs)) for task in tasks]

--- a/cognee/shared/GithubTopology.py
+++ b/cognee/shared/GithubTopology.py
@ -18,7 +18,7 @@ class Directory(BaseModel):
    directories: List['Directory'] = []

 # Allows recursive Directory Model
-Directory.update_forward_refs()
+Directory.model_rebuild()

 class RepositoryProperties(BaseModel):
    custom_properties: Optional[Dict[str, Any]] = None
--- a/cognee/shared/SourceCodeGraph.py
+++ b/cognee/shared/SourceCodeGraph.py
@ -6,15 +6,15 @@ class BaseClass(BaseModel):
    name: str
    type: Literal["Class"] = "Class"
    description: str
-    constructor_parameters: Optional[List[str]]
+    constructor_parameters: Optional[List[str]] = None

 class Class(BaseModel):
    id: str
    name: str
    type: Literal["Class"] = "Class"
    description: str
-    constructor_parameters: Optional[List[str]]
-    from_class: Optional[BaseClass]
+    constructor_parameters: Optional[List[str]] = None
+    from_class: Optional[BaseClass] = None

 class ClassInstance(BaseModel):
    id: str
@ -28,7 +28,7 @@ class Function(BaseModel):
    name: str
    type: Literal["Function"] = "Function"
    description: str
-    parameters: Optional[List[str]]
+    parameters: Optional[List[str]] = None
    return_type: str
    is_static: Optional[bool] = False

@ -38,7 +38,7 @@ class Variable(BaseModel):
    type: Literal["Variable"] = "Variable"
    description: str
    is_static: Optional[bool] = False
-    default_value: Optional[str]
+    default_value: Optional[str] = None

 class Operator(BaseModel):
    id: str
--- a/cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py
+++ b/cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py
@ -21,7 +21,6 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
    for chunk_index, chunk in enumerate(data_chunks):
        chunk_classification = chunk_classifications[chunk_index]
        classification_data_points.append(uuid5(NAMESPACE_OID, chunk_classification.label.type))
-        classification_data_points.append(uuid5(NAMESPACE_OID, chunk_classification.label.type))

        for classification_subclass in chunk_classification.label.subclass:
            classification_data_points.append(uuid5(NAMESPACE_OID, classification_subclass.value))
@ -39,7 +38,7 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
    if await vector_engine.has_collection(collection_name):
        existing_data_points = await vector_engine.retrieve(
            collection_name,
-            list(set(classification_data_points)),
+            [str(classification_data) for classification_data in list(set(classification_data_points))],
        ) if len(classification_data_points) > 0 else []

        existing_points_map = {point.id: True for point in existing_data_points}
@ -60,7 +59,7 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
            data_points.append(
                DataPoint[Keyword](
                    id=str(classification_type_id),
-                    payload=Keyword.parse_obj({
+                    payload=Keyword.model_validate({
                        "uuid": str(classification_type_id),
                        "text": classification_type_label,
                        "chunk_id": str(data_chunk.chunk_id),
@ -99,7 +98,7 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
                data_points.append(
                    DataPoint[Keyword](
                        id=str(classification_subtype_id),
-                        payload=Keyword.parse_obj({
+                        payload=Keyword.model_validate({
                            "uuid": str(classification_subtype_id),
                            "text": classification_subtype_label,
                            "chunk_id": str(data_chunk.chunk_id),
--- a/cognee/tasks/graph/infer_data_ontology.py
+++ b/cognee/tasks/graph/infer_data_ontology.py
@ -56,7 +56,7 @@ class OntologyEngine:
            for item in items:
                flat_list.extend(await self.recursive_flatten(item, parent_id))
        elif isinstance(items, dict):
-            model = NodeModel.parse_obj(items)
+            model = NodeModel.model_validate(items)
            flat_list.append(await self.flatten_model(model, parent_id))
            for child in model.children:
                flat_list.extend(await self.recursive_flatten(child, model.node_id))
--- a/cognee/tasks/infer_data_ontology/models/models.py
+++ b/cognee/tasks/infer_data_ontology/models/models.py
@ -0,0 +1,31 @@
+from typing import Any, Dict, List, Optional, Union
+from pydantic import BaseModel, Field
+
+class RelationshipModel(BaseModel):
+    type: str
+    source: str
+    target: str
+
+class NodeModel(BaseModel):
+    node_id: str
+    name: str
+    default_relationship: Optional[RelationshipModel] = None
+    children: List[Union[Dict[str, Any], "NodeModel"]] = Field(default_factory=list)
+
+NodeModel.model_rebuild()
+
+
+class OntologyNode(BaseModel):
+    id: str = Field(..., description = "Unique identifier made from node name.")
+    name: str
+    description: str
+
+class OntologyEdge(BaseModel):
+    id: str
+    source_id: str
+    target_id: str
+    relationship_type: str
+
+class GraphOntology(BaseModel):
+    nodes: list[OntologyNode]
+    edges: list[OntologyEdge]
--- a/cognee/tests/test_library.py
+++ b/cognee/tests/test_library.py
@ -49,7 +49,7 @@ async def  main():

    search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
    assert len(search_results) != 0, "Query related summaries don't exist."
-    print("\n\Extracted summaries are:\n")
+    print("\nExtracted summaries are:\n")
    for result in search_results:
        print(f"{result}\n")

--- a/cognee/tests/test_neo4j.py
+++ b/cognee/tests/test_neo4j.py
@ -53,7 +53,7 @@ async def main():

    search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
    assert len(search_results) != 0, "Query related summaries don't exist."
-    print("\n\Extracted summaries are:\n")
+    print("\nExtracted summaries are:\n")
    for result in search_results:
        print(f"{result}\n")

--- a/cognee/tests/test_qdrant.py
+++ b/cognee/tests/test_qdrant.py
@ -54,7 +54,7 @@ async def main():

    search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
    assert len(search_results) != 0, "Query related summaries don't exist."
-    print("\n\Extracted summaries are:\n")
+    print("\nExtracted summaries are:\n")
    for result in search_results:
        print(f"{result}\n")

--- a/cognee/tests/test_weaviate.py
+++ b/cognee/tests/test_weaviate.py
@ -52,7 +52,7 @@ async def main():

    search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
    assert len(search_results) != 0, "Query related summaries don't exist."
-    print("\n\Extracted summaries are:\n")
+    print("\nExtracted summaries are:\n")
    for result in search_results:
        print(f"{result}\n")

--- a/docs/configuration.md
+++ b/docs/configuration.md
@ -14,9 +14,11 @@ Check available configuration options:
 from cognee.infrastructure.databases.vector import get_vectordb_config
 from cognee.infrastructure.databases.graph.config import get_graph_config
 from cognee.infrastructure.databases.relational import get_relational_config
+from cognee.infrastructure.llm.config import get_llm_config
 print(get_vectordb_config().to_dict())
 print(get_graph_config().to_dict())
 print(get_relational_config().to_dict())
+print(get_llm_config().to_dict())

 ```

@ -29,8 +31,7 @@ GRAPH_DATABASE_PROVIDER = 'lancedb'
 Otherwise, you can set the configuration yourself:

 ```python
-
-cognee.config.llm_provider = 'ollama'
+cognee.config.set_llm_provider('ollama')
 ```

 ## 🚀 Getting Started with Local Models
@ -52,15 +53,14 @@ LLM_PROVIDER = 'ollama'
 Otherwise, you can set the configuration for the model:

 ```bash
-cognee.config.llm_provider = 'ollama'
+cognee.config.set_llm_provider('ollama')

 ```
 You can also set the HOST and model name:

 ```bash
-
-cognee.config.llm_endpoint = "http://localhost:11434/v1"
-cognee.config.llm_model = "mistral:instruct"
+cognee.config.set_llm_endpoint("http://localhost:11434/v1")
+cognee.config.set_llm_model("mistral:instruct")
 ```


@ -73,7 +73,7 @@ LLM_PROVIDER = 'custom'
 Otherwise, you can set the configuration for the model:

 ```bash
-cognee.config.llm_provider = 'custom'
+cognee.config.set_llm_provider('custom')

 ```
 You can also set the HOST  and model name:
--- a/examples/python/simple_example.py
+++ b/examples/python/simple_example.py
@ -0,0 +1,39 @@
+import cognee
+import asyncio
+from cognee.api.v1.search import SearchType
+
+# Prerequisites:
+# 1. Copy `.env.template` and rename it to `.env`.
+# 2. Add your OpenAI API key to the `.env` file in the `LLM_API_KEY` field:
+#    LLM_API_KEY = "your_key_here"
+# 3. (Optional) To minimize setup effort, set `VECTOR_DB_PROVIDER="lancedb"` in `.env".
+
+async def main():
+    # Create a clean slate for cognee -- reset data and system state
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+
+    # cognee knowledge graph will be created based on this text
+    text = """
+    Natural language processing (NLP) is an interdisciplinary
+    subfield of computer science and information retrieval.
+    """
+
+    # Add the text, and make it available for cognify
+    await cognee.add(text)
+
+    # Use LLMs and cognee to create knowledge graph
+    await cognee.cognify()
+
+    # Query cognee for insights on the added text
+    search_results = await cognee.search(
+        SearchType.INSIGHTS,
+        {'query': 'Tell me about NLP'}
+    )
+
+    # Display search results
+    for result_text in search_results:
+        print(result_text)
+
+if __name__ == '__main__':
+    asyncio.run(main())
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -19,60 +19,57 @@ classifiers = [

 [tool.poetry.dependencies]
 python = ">=3.9.0,<3.12"
-openai = "1.27.0"
+openai = "1.52.0"
 pydantic = "2.8.2"
 python-dotenv = "1.0.1"
 fastapi = "^0.109.2"
 uvicorn = "0.22.0"
+requests = "2.32.3"
+aiohttp = "3.10.10"
+typing_extensions = "4.12.2"
+dspy = "2.5.25"
+nest_asyncio = "1.6.0"
+numpy = "1.26.4"
+datasets = "3.1.0"
+falkordb = "1.0.9"
 boto3 = "^1.26.125"
+botocore="^1.35.54"
 gunicorn = "^20.1.0"
 sqlalchemy = "2.0.35"
-instructor = "1.3.5"
+instructor = "1.6.3"
 networkx = "^3.2.1"
-debugpy = "1.8.2"
-pyarrow = "15.0.0"
-pylint = "^3.0.3"
 aiosqlite = "^0.20.0"
 pandas = "2.0.3"
-greenlet = "^3.0.3"
-ruff = "^0.2.2"
 filetype = "^1.2.0"
 nltk = "^3.8.1"
 dlt = {extras = ["sqlalchemy"], version = "^1.2.0"}
-overrides = "^7.7.0"
 aiofiles = "^23.2.1"
 qdrant-client = "^1.9.0"
 graphistry = "^0.33.5"
-tenacity = "^8.2.3"
+tenacity = "^9.0.0"
 weaviate-client = "4.6.7"
 scikit-learn = "^1.5.0"
-fastembed = "0.2.7"
 pypdf = "^4.1.0"
 neo4j = "^5.20.0"
 jinja2 = "^3.1.3"
 matplotlib = "^3.8.3"
-structlog = "^24.1.0"
 tiktoken = "0.7.0"
+langchain_text_splitters = "0.3.2" 
+langsmith = "0.1.139"
+langdetect = "1.0.9"
 posthog = "^3.5.0"
 lancedb = "0.15.0"
-litellm = "1.38.10"
+litellm = "1.49.1"
 groq = "0.8.0"
-tantivy = "^0.22.0"
-tokenizers ="0.15.2"
-transformers ="4.39.0"
-python-multipart = "^0.0.9"
 langfuse = "^2.32.0"
-protobuf = "<5.0.0"
 pydantic-settings = "^2.2.1"
 anthropic = "^0.26.1"
-pdfplumber = "^0.11.1"
 sentry-sdk = {extras = ["fastapi"], version = "^2.9.0"}
-fastapi-users = { version = "*", extras = ["sqlalchemy"] }
+fastapi-users = {version = "*", extras = ["sqlalchemy"]}
 asyncpg = "^0.29.0"
 alembic = "^1.13.3"
 pgvector = "^0.3.5"
 psycopg2 = {version = "^2.9.10", optional = true}
-falkordb = "^1.0.9"

 [tool.poetry.extras]
 filesystem = ["s3fs", "botocore"]
@ -89,6 +86,11 @@ pytest-asyncio = "^0.21.1"
 coverage = "^7.3.2"
 mypy = "^1.7.1"
 notebook = "^7.1.1"
+deptry = "^0.20.0"
+debugpy = "1.8.2"
+pylint = "^3.0.3"
+ruff = "^0.2.2"
+tweepy = "4.14.0"

 [tool.poetry.group.docs.dependencies]
 mkdocs-material = "^9.5.42"
				`@ -0,0 +1 @@`
				`from .get_cognify_router import get_cognify_router`
				`@ -0,0 +1 @@`
				`from .get_datasets_router import get_datasets_router`
				`@ -0,0 +1 @@`
				`from .get_permissions_router import get_permissions_router`
				`@ -0,0 +1 @@`
				`from .get_search_router import get_search_router`
				`@ -0,0 +1 @@`
				`from .get_settings_router import get_settings_router`