<!-- .github/pull_request_template.md --> ## Description <!-- Please provide a clear, human-generated description of the changes in this PR. DO NOT use AI-generated descriptions. We want to understand your thought process and reasoning. --> Update default tutorial: 1. Use tutorial from [notebook_tutorial branch](https://github.com/topoteretes/cognee/blob/notebook_tutorial/notebooks/tutorial.ipynb), specifically - it's .zip version with all necessary data files 2. Use Jupyter Notebook `Notebook` abstractions to read, and map `ipynb` into our Notebook model 3. Dynamically update starter notebook code blocks that reference starter data files, and swap them with local paths to downloaded copies 4. Test coverage | Before | After (storage backend = local) | After (s3) | |--------|---------------------------------|------------| | <img width="613" height="546" alt="Screenshot 2025-09-17 at 01 00 58" src="https://github.com/user-attachments/assets/20b59021-96c1-4a83-977f-e064324bd758" /> | <img width="1480" height="262" alt="Screenshot 2025-09-18 at 13 01 57" src="https://github.com/user-attachments/assets/bd56ea78-7c6a-42e3-ae3f-4157da231b2d" /> | <img width="1485" height="307" alt="Screenshot 2025-09-18 at 12 56 08" src="https://github.com/user-attachments/assets/248ae720-4c78-445a-ba8b-8a2991ed3f80" /> | ## File Replacements ### S3 Demo https://github.com/user-attachments/assets/bd46eec9-ef77-4f69-9ef0-e7d1612ff9b3 --- ### Local FS Demo https://github.com/user-attachments/assets/8251cea0-81b3-4cac-a968-9576c358f334 ## Type of Change <!-- Please check the relevant option --> - [ ] Bug fix (non-breaking change that fixes an issue) - [x] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Changes Made <!-- List the specific changes made in this PR --> - - - ## Testing <!-- Describe how you tested your changes --> ## Screenshots/Videos (if applicable) <!-- Add screenshots or videos to help explain your changes --> ## Pre-submission Checklist <!-- Please check all boxes that apply before submitting your PR --> - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## Related Issues <!-- Link any related issues using "Fixes #issue_number" or "Relates to #issue_number" --> ## Additional Notes <!-- Add any additional notes, concerns, or context for reviewers --> ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
171 lines
5.5 KiB
Python
171 lines
5.5 KiB
Python
import inspect
|
|
from typing import BinaryIO
|
|
from contextlib import asynccontextmanager
|
|
|
|
from .storage import Storage
|
|
|
|
|
|
class StorageManager:
|
|
"""
|
|
Manages storage operations by delegating tasks to a storage backend.
|
|
|
|
Public methods include:
|
|
- store: Store data in the specified path.
|
|
- open: Open a file from the specified path.
|
|
- remove: Remove the file at the specified path.
|
|
- remove_all: Remove all files under the directory tree.
|
|
"""
|
|
|
|
storage: Storage = None
|
|
|
|
def __init__(self, storage: Storage):
|
|
self.storage = storage
|
|
|
|
async def file_exists(self, file_path: str):
|
|
"""
|
|
Check if a specified file exists in the storage.
|
|
|
|
Parameters:
|
|
-----------
|
|
|
|
- file_path (str): The path of the file to check for existence.
|
|
|
|
Returns:
|
|
--------
|
|
|
|
- bool: True if the file exists, otherwise False.
|
|
"""
|
|
if inspect.iscoroutinefunction(self.storage.file_exists):
|
|
return await self.storage.file_exists(file_path)
|
|
else:
|
|
return self.storage.file_exists(file_path)
|
|
|
|
async def is_file(self, file_path: str):
|
|
if inspect.iscoroutinefunction(self.storage.is_file):
|
|
return await self.storage.is_file(file_path)
|
|
else:
|
|
return self.storage.is_file(file_path)
|
|
|
|
async def get_size(self, file_path: str) -> int:
|
|
if inspect.iscoroutinefunction(self.storage.get_size):
|
|
return await self.storage.get_size(file_path)
|
|
else:
|
|
return self.storage.get_size(file_path)
|
|
|
|
async def store(self, file_path: str, data: BinaryIO, overwrite: bool = False) -> str:
|
|
"""
|
|
Store data at the specified file path.
|
|
|
|
Parameters:
|
|
-----------
|
|
|
|
- file_path (str): The path where the data should be stored.
|
|
- data (BinaryIO): The data in a binary format that needs to be stored.
|
|
- overwrite (bool): If True, overwrite the existing file.
|
|
|
|
Returns:
|
|
--------
|
|
|
|
Returns the full path to the file.
|
|
"""
|
|
if inspect.iscoroutinefunction(self.storage.store):
|
|
return await self.storage.store(file_path, data, overwrite)
|
|
else:
|
|
return self.storage.store(file_path, data, overwrite)
|
|
|
|
@asynccontextmanager
|
|
async def open(self, file_path: str, encoding: str = None, *args, **kwargs):
|
|
"""
|
|
Retrieve data from the specified file path.
|
|
|
|
Parameters:
|
|
-----------
|
|
|
|
- file_path (str): The path from which to retrieve the data.
|
|
|
|
Returns:
|
|
--------
|
|
|
|
Returns the retrieved data, as defined by the storage implementation.
|
|
"""
|
|
# Check the actual storage type by class name to determine if open() is async or sync
|
|
|
|
if self.storage.__class__.__name__ == "S3FileStorage":
|
|
# S3FileStorage.open() is async
|
|
async with self.storage.open(file_path, *args, **kwargs) as file:
|
|
yield file
|
|
else:
|
|
# LocalFileStorage.open() is sync
|
|
with self.storage.open(file_path, *args, **kwargs) as file:
|
|
yield file
|
|
|
|
async def ensure_directory_exists(self, directory_path: str = ""):
|
|
"""
|
|
Ensure that the specified directory exists, creating it if necessary.
|
|
|
|
If the directory already exists, no action is taken.
|
|
|
|
Parameters:
|
|
-----------
|
|
|
|
- directory_path (str): The path of the directory to check or create.
|
|
"""
|
|
if inspect.iscoroutinefunction(self.storage.ensure_directory_exists):
|
|
return await self.storage.ensure_directory_exists(directory_path)
|
|
else:
|
|
return self.storage.ensure_directory_exists(directory_path)
|
|
|
|
async def remove(self, file_path: str):
|
|
"""
|
|
Remove the file at the specified path.
|
|
|
|
Parameters:
|
|
-----------
|
|
|
|
- file_path (str): The path of the file to be removed.
|
|
|
|
Returns:
|
|
--------
|
|
|
|
Returns the outcome of the remove operation, as defined by the storage
|
|
implementation.
|
|
"""
|
|
if inspect.iscoroutinefunction(self.storage.remove):
|
|
return await self.storage.remove(file_path)
|
|
else:
|
|
return self.storage.remove(file_path)
|
|
|
|
async def list_files(self, directory_path: str, recursive: bool = False) -> list[str]:
|
|
"""
|
|
List all files in the specified directory.
|
|
|
|
Parameters:
|
|
-----------
|
|
- directory_path (str): The directory path to list files from
|
|
- recursive (bool): If True, list files recursively in subdirectories
|
|
|
|
Returns:
|
|
--------
|
|
- list[str]: List of file paths relative to the storage root
|
|
"""
|
|
if inspect.iscoroutinefunction(self.storage.list_files):
|
|
return await self.storage.list_files(directory_path, recursive)
|
|
else:
|
|
return self.storage.list_files(directory_path, recursive)
|
|
|
|
async def remove_all(self, tree_path: str = None):
|
|
"""
|
|
Remove an entire directory tree at the specified path, including all files and
|
|
subdirectories.
|
|
|
|
If the directory does not exist, no action is taken and no exception is raised.
|
|
|
|
Parameters:
|
|
-----------
|
|
|
|
- tree_path (str): The root path of the directory tree to be removed.
|
|
"""
|
|
if inspect.iscoroutinefunction(self.storage.remove_all):
|
|
return await self.storage.remove_all(tree_path)
|
|
else:
|
|
return self.storage.remove_all(tree_path)
|