From 94bf7b4c30a0ff84f73e427e9922882a4e0eb4d2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 27 Aug 2025 21:49:40 +0000 Subject: [PATCH 1/4] chore(deps): bump the pip group across 2 directories with 1 update Updates the requirements on and [pypdf](https://github.com/py-pdf/pypdf) to permit the latest version. Updates `pypdf` to 6.0.0 - [Release notes](https://github.com/py-pdf/pypdf/releases) - [Changelog](https://github.com/py-pdf/pypdf/blob/main/CHANGELOG.md) - [Commits](https://github.com/py-pdf/pypdf/compare/4.1.0...6.0.0) Updates `pypdf` to 6.0.0 - [Release notes](https://github.com/py-pdf/pypdf/releases) - [Changelog](https://github.com/py-pdf/pypdf/blob/main/CHANGELOG.md) - [Commits](https://github.com/py-pdf/pypdf/compare/4.1.0...6.0.0) --- updated-dependencies: - dependency-name: pypdf dependency-version: 6.0.0 dependency-type: direct:production dependency-group: pip - dependency-name: pypdf dependency-version: 6.0.0 dependency-type: direct:production dependency-group: pip ... Signed-off-by: dependabot[bot] --- distributed/pyproject.toml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/distributed/pyproject.toml b/distributed/pyproject.toml index fc04c6fcc..f61157236 100644 --- a/distributed/pyproject.toml +++ b/distributed/pyproject.toml @@ -40,7 +40,7 @@ dependencies = [ "aiohttp>=3.11.14,<4.0.0", "aiofiles>=23.2.1,<24.0.0", "rdflib>=7.1.4,<7.2.0", - "pypdf>=4.1.0,<6.0.0", + "pypdf>=4.1.0,<7.0.0", "jinja2>=3.1.3,<4", "matplotlib>=3.8.3,<4", "networkx>=3.4.2,<4", diff --git a/pyproject.toml b/pyproject.toml index 272c8e929..5e22b1eb1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ dependencies = [ "aiohttp>=3.11.14,<4.0.0", "aiofiles>=23.2.1,<24.0.0", "rdflib>=7.1.4,<7.2.0", - "pypdf>=4.1.0,<6.0.0", + "pypdf>=4.1.0,<7.0.0", "jinja2>=3.1.3,<4", "matplotlib>=3.8.3,<4", "networkx>=3.4.2,<4", From 5fa5bfa68211f645bef20de54e9e73e5408a9c19 Mon Sep 17 00:00:00 2001 From: Mohammad Date: Wed, 10 Sep 2025 14:11:00 +0200 Subject: [PATCH 2/4] feat: add support for AWS session token in S3 configuration --- cognee/infrastructure/files/storage/S3FileStorage.py | 8 +++++++- cognee/infrastructure/files/storage/s3_config.py | 2 +- cognee/tasks/ingestion/resolve_data_directories.py | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/cognee/infrastructure/files/storage/S3FileStorage.py b/cognee/infrastructure/files/storage/S3FileStorage.py index 7c5a1033c..a0d611241 100644 --- a/cognee/infrastructure/files/storage/S3FileStorage.py +++ b/cognee/infrastructure/files/storage/S3FileStorage.py @@ -21,10 +21,11 @@ class S3FileStorage(Storage): def __init__(self, storage_path: str): self.storage_path = storage_path s3_config = get_s3_config() - if s3_config.aws_access_key_id is not None and s3_config.aws_secret_access_key is not None: + if s3_config.aws_access_key_id is not None and s3_config.aws_secret_access_key is not None and s3_config.aws_session_token is not None: self.s3 = s3fs.S3FileSystem( key=s3_config.aws_access_key_id, secret=s3_config.aws_secret_access_key, + token=s3_config.aws_session_token, anon=False, endpoint_url=s3_config.aws_endpoint_url, client_kwargs={"region_name": s3_config.aws_region}, @@ -146,6 +147,11 @@ class S3FileStorage(Storage): self.s3.isfile, os.path.join(self.storage_path.replace("s3://", ""), file_path) ) + async def get_size(self, file_path: str) -> int: + return await run_async( + self.s3.size, os.path.join(self.storage_path.replace("s3://", ""), file_path) + ) + async def ensure_directory_exists(self, directory_path: str = ""): """ Ensure that the specified directory exists, creating it if necessary. diff --git a/cognee/infrastructure/files/storage/s3_config.py b/cognee/infrastructure/files/storage/s3_config.py index 0b9372b7e..3b59bcd57 100644 --- a/cognee/infrastructure/files/storage/s3_config.py +++ b/cognee/infrastructure/files/storage/s3_config.py @@ -8,9 +8,9 @@ class S3Config(BaseSettings): aws_endpoint_url: Optional[str] = None aws_access_key_id: Optional[str] = None aws_secret_access_key: Optional[str] = None + aws_session_token: Optional[str] = None model_config = SettingsConfigDict(env_file=".env", extra="allow") - @lru_cache def get_s3_config(): return S3Config() diff --git a/cognee/tasks/ingestion/resolve_data_directories.py b/cognee/tasks/ingestion/resolve_data_directories.py index 1d3124a0c..cbd979e16 100644 --- a/cognee/tasks/ingestion/resolve_data_directories.py +++ b/cognee/tasks/ingestion/resolve_data_directories.py @@ -32,7 +32,7 @@ async def resolve_data_directories( import s3fs fs = s3fs.S3FileSystem( - key=s3_config.aws_access_key_id, secret=s3_config.aws_secret_access_key, anon=False + key=s3_config.aws_access_key_id, secret=s3_config.aws_secret_access_key,token=s3_config.aws_session_token, anon=False ) for item in data: From e2ed2793140a6993d500f823bf656f5ba93af6cd Mon Sep 17 00:00:00 2001 From: Mohammad Date: Wed, 10 Sep 2025 14:14:22 +0200 Subject: [PATCH 3/4] feat: add support for AWS session token in S3 configuration --- cognee/infrastructure/files/storage/s3_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cognee/infrastructure/files/storage/s3_config.py b/cognee/infrastructure/files/storage/s3_config.py index 3b59bcd57..cefe5cd2f 100644 --- a/cognee/infrastructure/files/storage/s3_config.py +++ b/cognee/infrastructure/files/storage/s3_config.py @@ -11,6 +11,7 @@ class S3Config(BaseSettings): aws_session_token: Optional[str] = None model_config = SettingsConfigDict(env_file=".env", extra="allow") + @lru_cache def get_s3_config(): return S3Config() From a0c951336e22a6ee14036c3e51527e2b1723f9ed Mon Sep 17 00:00:00 2001 From: Mohammad Date: Wed, 10 Sep 2025 14:20:42 +0200 Subject: [PATCH 4/4] feat: add support for AWS session token in S3 configuration --- cognee/infrastructure/files/storage/S3FileStorage.py | 6 +++++- cognee/tasks/ingestion/resolve_data_directories.py | 5 ++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/cognee/infrastructure/files/storage/S3FileStorage.py b/cognee/infrastructure/files/storage/S3FileStorage.py index a0d611241..078d5fe2a 100644 --- a/cognee/infrastructure/files/storage/S3FileStorage.py +++ b/cognee/infrastructure/files/storage/S3FileStorage.py @@ -21,7 +21,11 @@ class S3FileStorage(Storage): def __init__(self, storage_path: str): self.storage_path = storage_path s3_config = get_s3_config() - if s3_config.aws_access_key_id is not None and s3_config.aws_secret_access_key is not None and s3_config.aws_session_token is not None: + if ( + s3_config.aws_access_key_id is not None + and s3_config.aws_secret_access_key is not None + and s3_config.aws_session_token is not None + ): self.s3 = s3fs.S3FileSystem( key=s3_config.aws_access_key_id, secret=s3_config.aws_secret_access_key, diff --git a/cognee/tasks/ingestion/resolve_data_directories.py b/cognee/tasks/ingestion/resolve_data_directories.py index cbd979e16..aa2f95303 100644 --- a/cognee/tasks/ingestion/resolve_data_directories.py +++ b/cognee/tasks/ingestion/resolve_data_directories.py @@ -32,7 +32,10 @@ async def resolve_data_directories( import s3fs fs = s3fs.S3FileSystem( - key=s3_config.aws_access_key_id, secret=s3_config.aws_secret_access_key,token=s3_config.aws_session_token, anon=False + key=s3_config.aws_access_key_id, + secret=s3_config.aws_secret_access_key, + token=s3_config.aws_session_token, + anon=False, ) for item in data: