Feature: Makes s3 pathway imports optional so cognee can run without s3fs (#978)

<!-- .github/pull_request_template.md -->

## Description
Makes s3 pathway imports optional so cognee can run without s3fs

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
hajdul88 2025-06-13 08:53:30 +02:00 committed by GitHub
parent 842fb8112f
commit 21a4217301
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 79 additions and 188 deletions

View file

@ -201,6 +201,10 @@ jobs:
with: with:
python-version: '3.11.x' python-version: '3.11.x'
- name: Install specific S3 dependency
run: |
poetry install -E aws
- name: Run S3 Bucket Test - name: Run S3 Bucket Test
env: env:
ENV: 'dev' ENV: 'dev'
@ -243,6 +247,4 @@ jobs:
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
run: poetry run python ./cognee/tests/test_parallel_databases.py run: poetry run python ./cognee/tests/test_parallel_databases.py

View file

@ -1,60 +0,0 @@
import os
import sys
import boto3
from dotenv import load_dotenv
# Get the directory that contains your script
current_dir = os.path.dirname(os.path.abspath(__file__))
# Get the parent directory
parent_dir = os.path.dirname(current_dir)
# Add the parent directory to sys.path
sys.path.insert(0, parent_dir)
environment = os.getenv("AWS_ENV", "dev")
def fetch_secret(secret_name: str, region_name: str, env_file_path: str):
"""Fetch the secret from AWS Secrets Manager and write it to the .env file."""
print("Initializing session")
session = boto3.session.Session()
print("Session initialized")
client = session.client(service_name="secretsmanager", region_name=region_name)
print("Client initialized")
try:
response = client.get_secret_value(SecretId=secret_name)
except Exception as e:
print(f"Error retrieving secret: {e}")
return f"Error retrieving secret: {e}"
if "SecretString" in response:
secret = response["SecretString"]
else:
secret = response["SecretBinary"]
with open(env_file_path, "w") as env_file:
env_file.write(secret)
print("Secrets are added to the .env file.")
if os.path.exists(env_file_path):
print(f"The .env file is located at: {env_file_path}")
load_dotenv()
print("The .env file is loaded.")
else:
print(f"The .env file was not found at: {env_file_path}.")
ENV_FILE_PATH = os.path.abspath("../.env")
if os.path.exists(ENV_FILE_PATH):
# Load default environment variables (.env)
load_dotenv()
print("Environment variables are already loaded.")
else:
fetch_secret(
f"promethai-{environment}-backend-secretso-promethaijs-dotenv",
"eu-west-1",
ENV_FILE_PATH,
)

View file

@ -1,4 +1,3 @@
import s3fs
from typing import IO, Optional from typing import IO, Optional
from cognee.api.v1.add.config import get_s3_config from cognee.api.v1.add.config import get_s3_config
@ -9,6 +8,8 @@ def open_data_file(
if file_path.startswith("s3://"): if file_path.startswith("s3://"):
s3_config = get_s3_config() s3_config = get_s3_config()
if s3_config.aws_access_key_id is not None and s3_config.aws_secret_access_key is not None: if s3_config.aws_access_key_id is not None and s3_config.aws_secret_access_key is not None:
import s3fs
fs = s3fs.S3FileSystem( fs = s3fs.S3FileSystem(
key=s3_config.aws_access_key_id, secret=s3_config.aws_secret_access_key, anon=False key=s3_config.aws_access_key_id, secret=s3_config.aws_secret_access_key, anon=False
) )

View file

@ -1,22 +1,40 @@
from io import BufferedReader from io import BufferedReader
from typing import Union, BinaryIO, Optional from typing import Union, BinaryIO, Optional, Any
from .data_types import TextData, BinaryData, S3BinaryData from .data_types import TextData, BinaryData
from tempfile import SpooledTemporaryFile from tempfile import SpooledTemporaryFile
from s3fs.core import S3File, S3FileSystem
from cognee.modules.ingestion.exceptions import IngestionError from cognee.modules.ingestion.exceptions import IngestionError
try:
from s3fs.core import S3File
from cognee.modules.ingestion.data_types.S3BinaryData import S3BinaryData
except ImportError:
S3File = None
S3BinaryData = None
def classify(data: Union[str, BinaryIO], filename: str = None, s3fs: Optional[S3FileSystem] = None):
def classify(data: Union[str, BinaryIO], filename: str = None, s3fs: Optional[Any] = None):
if isinstance(data, str): if isinstance(data, str):
return TextData(data) return TextData(data)
if isinstance(data, BufferedReader) or isinstance(data, SpooledTemporaryFile): if isinstance(data, BufferedReader) or isinstance(data, SpooledTemporaryFile):
return BinaryData(data, str(data.name).split("/")[-1] if data.name else filename) return BinaryData(data, str(data.name).split("/")[-1] if data.name else filename)
if isinstance(data, S3File): try:
derived_filename = str(data.full_name).split("/")[-1] if data.full_name else filename from importlib import import_module
return S3BinaryData(s3_path=data.full_name, name=derived_filename, s3=s3fs)
s3core = import_module("s3fs.core")
S3File = s3core.S3File
except ImportError:
S3File = None
if S3File is not None:
from cognee.modules.ingestion.data_types.S3BinaryData import S3BinaryData
if isinstance(data, S3File):
derived_filename = str(data.full_name).split("/")[-1] if data.full_name else filename
return S3BinaryData(s3_path=data.full_name, name=derived_filename, s3=s3fs)
raise IngestionError( raise IngestionError(
message=f"Type of data sent to classify(data: Union[str, BinaryIO) not supported: {type(data)}" message=f"Type of data sent to classify(data: Union[str, BinaryIO) not supported or s3fs is not installed: {type(data)}"
) )

View file

@ -1,4 +1,3 @@
from .TextData import TextData, create_text_data from .TextData import TextData, create_text_data
from .BinaryData import BinaryData, create_binary_data from .BinaryData import BinaryData, create_binary_data
from .S3BinaryData import S3BinaryData, create_s3_binary_data
from .IngestionData import IngestionData from .IngestionData import IngestionData

View file

@ -1,5 +1,3 @@
from .translate_text import translate_text
from .detect_language import detect_language
from .classify_documents import classify_documents from .classify_documents import classify_documents
from .extract_chunks_from_documents import extract_chunks_from_documents from .extract_chunks_from_documents import extract_chunks_from_documents
from .check_permissions_on_dataset import check_permissions_on_dataset from .check_permissions_on_dataset import check_permissions_on_dataset

View file

@ -1,39 +0,0 @@
from cognee.shared.logging_utils import get_logger, ERROR
logger = get_logger(level=ERROR)
async def detect_language(text: str):
"""
Detect the language of the given text and return its ISO 639-1 language code.
If the detected language is Croatian ("hr"), it maps to Serbian ("sr").
The text is trimmed to the first 100 characters for efficient processing.
Parameters:
text (str): The text for language detection.
Returns:
str: The ISO 639-1 language code of the detected language, or "None" in case of an error.
"""
from langdetect import detect, LangDetectException
# Trim the text to the first 100 characters
trimmed_text = text[:100]
try:
# Detect the language using langdetect
detected_lang_iso639_1 = detect(trimmed_text)
# Special case: map "hr" (Croatian) to "sr" (Serbian ISO 639-2)
if detected_lang_iso639_1 == "hr":
return "sr"
return detected_lang_iso639_1
except LangDetectException as e:
logger.error(f"Language detection error: {e}")
except Exception as e:
logger.error(f"Unexpected error: {e}")
raise e
return None

View file

@ -1,46 +0,0 @@
from cognee.shared.logging_utils import get_logger, ERROR
from cognee.exceptions import InvalidValueError
logger = get_logger(level=ERROR)
async def translate_text(
text, source_language: str = "sr", target_language: str = "en", region_name="eu-west-1"
):
"""
Translate text from source language to target language using AWS Translate.
Parameters:
text (str): The text to be translated.
source_language (str): The source language code (e.g., "sr" for Serbian). ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php
target_language (str): The target language code (e.g., "en" for English). ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php
region_name (str): AWS region name.
Returns:
str: Translated text or an error message.
"""
import boto3
from botocore.exceptions import BotoCoreError, ClientError
if not text:
raise InvalidValueError(message="No text to translate.")
if not source_language or not target_language:
raise InvalidValueError(message="Source and target language codes are required.")
try:
translate = boto3.client(service_name="translate", region_name=region_name, use_ssl=True)
result = translate.translate_text(
Text=text,
SourceLanguageCode=source_language,
TargetLanguageCode=target_language,
)
yield result.get("TranslatedText", "No translation found.")
except BotoCoreError as e:
logger.error(f"BotoCoreError occurred: {e}")
yield None
except ClientError as e:
logger.error(f"ClientError occurred: {e}")
yield None

View file

@ -1,5 +1,4 @@
import dlt import dlt
import s3fs
import json import json
import inspect import inspect
from uuid import UUID from uuid import UUID
@ -40,6 +39,8 @@ async def ingest_data(
fs = None fs = None
if s3_config.aws_access_key_id is not None and s3_config.aws_secret_access_key is not None: if s3_config.aws_access_key_id is not None and s3_config.aws_secret_access_key is not None:
import s3fs
fs = s3fs.S3FileSystem( fs = s3fs.S3FileSystem(
key=s3_config.aws_access_key_id, secret=s3_config.aws_secret_access_key, anon=False key=s3_config.aws_access_key_id, secret=s3_config.aws_secret_access_key, anon=False
) )

View file

@ -1,5 +1,4 @@
import os import os
import s3fs
from typing import List, Union, BinaryIO from typing import List, Union, BinaryIO
from urllib.parse import urlparse from urllib.parse import urlparse
from cognee.api.v1.add.config import get_s3_config from cognee.api.v1.add.config import get_s3_config
@ -27,6 +26,8 @@ async def resolve_data_directories(
fs = None fs = None
if s3_config.aws_access_key_id is not None and s3_config.aws_secret_access_key is not None: if s3_config.aws_access_key_id is not None and s3_config.aws_secret_access_key is not None:
import s3fs
fs = s3fs.S3FileSystem( fs = s3fs.S3FileSystem(
key=s3_config.aws_access_key_id, secret=s3_config.aws_secret_access_key, anon=False key=s3_config.aws_access_key_id, secret=s3_config.aws_secret_access_key, anon=False
) )

30
poetry.lock generated
View file

@ -4,9 +4,10 @@
name = "aiobotocore" name = "aiobotocore"
version = "2.22.0" version = "2.22.0"
description = "Async client for aws services using botocore and aiohttp" description = "Async client for aws services using botocore and aiohttp"
optional = false optional = true
python-versions = ">=3.8" python-versions = ">=3.8"
groups = ["main"] groups = ["main"]
markers = "extra == \"aws\""
files = [ files = [
{file = "aiobotocore-2.22.0-py3-none-any.whl", hash = "sha256:b4e6306f79df9d81daff1f9d63189a2dbee4b77ce3ab937304834e35eaaeeccf"}, {file = "aiobotocore-2.22.0-py3-none-any.whl", hash = "sha256:b4e6306f79df9d81daff1f9d63189a2dbee4b77ce3ab937304834e35eaaeeccf"},
{file = "aiobotocore-2.22.0.tar.gz", hash = "sha256:11091477266b75c2b5d28421c1f2bc9a87d175d0b8619cb830805e7a113a170b"}, {file = "aiobotocore-2.22.0.tar.gz", hash = "sha256:11091477266b75c2b5d28421c1f2bc9a87d175d0b8619cb830805e7a113a170b"},
@ -163,9 +164,10 @@ speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns (>
name = "aioitertools" name = "aioitertools"
version = "0.12.0" version = "0.12.0"
description = "itertools and builtins for AsyncIO and mixed iterables" description = "itertools and builtins for AsyncIO and mixed iterables"
optional = false optional = true
python-versions = ">=3.8" python-versions = ">=3.8"
groups = ["main"] groups = ["main"]
markers = "extra == \"aws\""
files = [ files = [
{file = "aioitertools-0.12.0-py3-none-any.whl", hash = "sha256:fc1f5fac3d737354de8831cbba3eb04f79dd649d8f3afb4c5b114925e662a796"}, {file = "aioitertools-0.12.0-py3-none-any.whl", hash = "sha256:fc1f5fac3d737354de8831cbba3eb04f79dd649d8f3afb4c5b114925e662a796"},
{file = "aioitertools-0.12.0.tar.gz", hash = "sha256:c2a9055b4fbb7705f561b9d86053e8af5d10cc845d22c32008c43490b2d8dd6b"}, {file = "aioitertools-0.12.0.tar.gz", hash = "sha256:c2a9055b4fbb7705f561b9d86053e8af5d10cc845d22c32008c43490b2d8dd6b"},
@ -813,9 +815,10 @@ css = ["tinycss2 (>=1.1.0,<1.5)"]
name = "boto3" name = "boto3"
version = "1.37.3" version = "1.37.3"
description = "The AWS SDK for Python" description = "The AWS SDK for Python"
optional = false optional = true
python-versions = ">=3.8" python-versions = ">=3.8"
groups = ["main"] groups = ["main"]
markers = "extra == \"aws\""
files = [ files = [
{file = "boto3-1.37.3-py3-none-any.whl", hash = "sha256:2063b40af99fd02f6228ff52397b552ff3353831edaf8d25cc04801827ab9794"}, {file = "boto3-1.37.3-py3-none-any.whl", hash = "sha256:2063b40af99fd02f6228ff52397b552ff3353831edaf8d25cc04801827ab9794"},
{file = "boto3-1.37.3.tar.gz", hash = "sha256:21f3ce0ef111297e63a6eb998a25197b8c10982970c320d4c6e8db08be2157be"}, {file = "boto3-1.37.3.tar.gz", hash = "sha256:21f3ce0ef111297e63a6eb998a25197b8c10982970c320d4c6e8db08be2157be"},
@ -833,9 +836,10 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
name = "botocore" name = "botocore"
version = "1.37.3" version = "1.37.3"
description = "Low-level, data-driven core of boto 3." description = "Low-level, data-driven core of boto 3."
optional = false optional = true
python-versions = ">=3.8" python-versions = ">=3.8"
groups = ["main"] groups = ["main"]
markers = "extra == \"aws\""
files = [ files = [
{file = "botocore-1.37.3-py3-none-any.whl", hash = "sha256:d01bd3bf4c80e61fa88d636ad9f5c9f60a551d71549b481386c6b4efe0bb2b2e"}, {file = "botocore-1.37.3-py3-none-any.whl", hash = "sha256:d01bd3bf4c80e61fa88d636ad9f5c9f60a551d71549b481386c6b4efe0bb2b2e"},
{file = "botocore-1.37.3.tar.gz", hash = "sha256:fe8403eb55a88faf9b0f9da6615e5bee7be056d75e17af66c3c8f0a3b0648da4"}, {file = "botocore-1.37.3.tar.gz", hash = "sha256:fe8403eb55a88faf9b0f9da6615e5bee7be056d75e17af66c3c8f0a3b0648da4"},
@ -3889,9 +3893,10 @@ files = [
name = "jmespath" name = "jmespath"
version = "1.0.1" version = "1.0.1"
description = "JSON Matching Expressions" description = "JSON Matching Expressions"
optional = false optional = true
python-versions = ">=3.7" python-versions = ">=3.7"
groups = ["main"] groups = ["main"]
markers = "extra == \"aws\""
files = [ files = [
{file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"},
{file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"},
@ -3962,6 +3967,8 @@ python-versions = "*"
groups = ["main"] groups = ["main"]
files = [ files = [
{file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"}, {file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"},
{file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"},
{file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"},
] ]
[package.dependencies] [package.dependencies]
@ -5106,8 +5113,11 @@ files = [
{file = "lxml-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7ce1a171ec325192c6a636b64c94418e71a1964f56d002cc28122fceff0b6121"}, {file = "lxml-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7ce1a171ec325192c6a636b64c94418e71a1964f56d002cc28122fceff0b6121"},
{file = "lxml-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:795f61bcaf8770e1b37eec24edf9771b307df3af74d1d6f27d812e15a9ff3872"}, {file = "lxml-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:795f61bcaf8770e1b37eec24edf9771b307df3af74d1d6f27d812e15a9ff3872"},
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29f451a4b614a7b5b6c2e043d7b64a15bd8304d7e767055e8ab68387a8cacf4e"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29f451a4b614a7b5b6c2e043d7b64a15bd8304d7e767055e8ab68387a8cacf4e"},
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:891f7f991a68d20c75cb13c5c9142b2a3f9eb161f1f12a9489c82172d1f133c0"},
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aa412a82e460571fad592d0f93ce9935a20090029ba08eca05c614f99b0cc92"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aa412a82e460571fad592d0f93ce9935a20090029ba08eca05c614f99b0cc92"},
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:ac7ba71f9561cd7d7b55e1ea5511543c0282e2b6450f122672a2694621d63b7e"},
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:c5d32f5284012deaccd37da1e2cd42f081feaa76981f0eaa474351b68df813c5"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:c5d32f5284012deaccd37da1e2cd42f081feaa76981f0eaa474351b68df813c5"},
{file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:ce31158630a6ac85bddd6b830cffd46085ff90498b397bd0a259f59d27a12188"},
{file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31e63621e073e04697c1b2d23fcb89991790eef370ec37ce4d5d469f40924ed6"}, {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31e63621e073e04697c1b2d23fcb89991790eef370ec37ce4d5d469f40924ed6"},
{file = "lxml-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:be2ba4c3c5b7900246a8f866580700ef0d538f2ca32535e991027bdaba944063"}, {file = "lxml-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:be2ba4c3c5b7900246a8f866580700ef0d538f2ca32535e991027bdaba944063"},
{file = "lxml-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09846782b1ef650b321484ad429217f5154da4d6e786636c38e434fa32e94e49"}, {file = "lxml-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09846782b1ef650b321484ad429217f5154da4d6e786636c38e434fa32e94e49"},
@ -7665,6 +7675,7 @@ files = [
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"}, {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"},
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"}, {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"},
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"}, {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"},
{file = "psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142"},
{file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"}, {file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"},
{file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"}, {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"},
{file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"}, {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"},
@ -9581,9 +9592,10 @@ files = [
name = "s3fs" name = "s3fs"
version = "2025.3.2" version = "2025.3.2"
description = "Convenient Filesystem interface over S3" description = "Convenient Filesystem interface over S3"
optional = false optional = true
python-versions = ">=3.9" python-versions = ">=3.9"
groups = ["main"] groups = ["main"]
markers = "extra == \"aws\""
files = [ files = [
{file = "s3fs-2025.3.2-py3-none-any.whl", hash = "sha256:81eae3f37b4b04bcc08845d7bcc607c6ca45878813ef7e6a28d77b2688417130"}, {file = "s3fs-2025.3.2-py3-none-any.whl", hash = "sha256:81eae3f37b4b04bcc08845d7bcc607c6ca45878813ef7e6a28d77b2688417130"},
{file = "s3fs-2025.3.2.tar.gz", hash = "sha256:6798f896ec76dd3bfd8beb89f0bb7c5263cb2760e038bae0978505cd172a307c"}, {file = "s3fs-2025.3.2.tar.gz", hash = "sha256:6798f896ec76dd3bfd8beb89f0bb7c5263cb2760e038bae0978505cd172a307c"},
@ -9605,9 +9617,10 @@ boto3 = ["aiobotocore[boto3] (>=2.5.4,<3.0.0)"]
name = "s3transfer" name = "s3transfer"
version = "0.11.3" version = "0.11.3"
description = "An Amazon S3 Transfer Manager" description = "An Amazon S3 Transfer Manager"
optional = false optional = true
python-versions = ">=3.8" python-versions = ">=3.8"
groups = ["main"] groups = ["main"]
markers = "extra == \"aws\""
files = [ files = [
{file = "s3transfer-0.11.3-py3-none-any.whl", hash = "sha256:ca855bdeb885174b5ffa95b9913622459d4ad8e331fc98eb01e6d5eb6a30655d"}, {file = "s3transfer-0.11.3-py3-none-any.whl", hash = "sha256:ca855bdeb885174b5ffa95b9913622459d4ad8e331fc98eb01e6d5eb6a30655d"},
{file = "s3transfer-0.11.3.tar.gz", hash = "sha256:edae4977e3a122445660c7c114bba949f9d191bae3b34a096f18a1c8c354527a"}, {file = "s3transfer-0.11.3.tar.gz", hash = "sha256:edae4977e3a122445660c7c114bba949f9d191bae3b34a096f18a1c8c354527a"},
@ -11993,6 +12006,7 @@ cffi = ["cffi (>=1.11)"]
[extras] [extras]
anthropic = ["anthropic"] anthropic = ["anthropic"]
api = ["gunicorn", "kuzu", "uvicorn", "websockets"] api = ["gunicorn", "kuzu", "uvicorn", "websockets"]
aws = ["s3fs"]
chromadb = ["chromadb", "pypika"] chromadb = ["chromadb", "pypika"]
codegraph = ["fastembed", "transformers", "tree-sitter", "tree-sitter-python"] codegraph = ["fastembed", "transformers", "tree-sitter", "tree-sitter-python"]
debug = ["debugpy"] debug = ["debugpy"]
@ -12023,4 +12037,4 @@ weaviate = ["weaviate-client"]
[metadata] [metadata]
lock-version = "2.1" lock-version = "2.1"
python-versions = ">=3.10,<=3.13" python-versions = ">=3.10,<=3.13"
content-hash = "ea71b85520cb437c259639de02daaeb9b4fdb78eb5ce216b28c31d2133f8e0e8" content-hash = "4d5f5cfe7072a53e4d9d38e5503a9839b555add9087b8947e5eecf0f80b9cbbb"

View file

@ -27,9 +27,6 @@ dependencies = [
"nltk==3.9.1", "nltk==3.9.1",
"numpy>=1.26.4, <=2.1", "numpy>=1.26.4, <=2.1",
"pandas>=2.2.2", "pandas>=2.2.2",
# Note: New s3fs and boto3 versions don't work well together
# Always use comaptible fixed versions of these two dependencies
"s3fs[boto3]==2025.3.2",
"sqlalchemy==2.0.39", "sqlalchemy==2.0.39",
"aiosqlite>=0.20.0,<0.21", "aiosqlite>=0.20.0,<0.21",
"tiktoken<=0.9.0", "tiktoken<=0.9.0",
@ -118,6 +115,9 @@ gui = [
"qasync>=0.27.1,<0.28", "qasync>=0.27.1,<0.28",
] ]
graphiti = ["graphiti-core>=0.7.0,<0.8"] graphiti = ["graphiti-core>=0.7.0,<0.8"]
# Note: New s3fs and boto3 versions don't work well together
# Always use comaptible fixed versions of these two dependencies
aws = ["s3fs[boto3]==2025.3.2"]
dev = [ dev = [
"pytest>=7.4.0,<8", "pytest>=7.4.0,<8",
"pytest-cov>=6.1.1", "pytest-cov>=6.1.1",

34
uv.lock generated
View file

@ -1,4 +1,5 @@
version = 1 version = 1
revision = 1
requires-python = ">=3.10, <=3.13" requires-python = ">=3.10, <=3.13"
resolution-markers = [ resolution-markers = [
"python_full_version >= '3.13'", "python_full_version >= '3.13'",
@ -57,7 +58,6 @@ dependencies = [
{ name = "aiohappyeyeballs" }, { name = "aiohappyeyeballs" },
{ name = "aiosignal" }, { name = "aiosignal" },
{ name = "async-timeout", version = "4.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "async-timeout", version = "4.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
{ name = "async-timeout", version = "5.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_version < '0'" },
{ name = "attrs" }, { name = "attrs" },
{ name = "frozenlist" }, { name = "frozenlist" },
{ name = "multidict" }, { name = "multidict" },
@ -345,7 +345,6 @@ version = "0.30.0"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ dependencies = [
{ name = "async-timeout", version = "4.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "async-timeout", version = "4.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
{ name = "async-timeout", version = "5.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_version < '0'" },
] ]
sdist = { url = "https://files.pythonhosted.org/packages/2f/4c/7c991e080e106d854809030d8584e15b2e996e26f16aee6d757e387bc17d/asyncpg-0.30.0.tar.gz", hash = "sha256:c551e9928ab6707602f44811817f82ba3c446e018bfe1d3abecc8ba5f3eac851", size = 957746 } sdist = { url = "https://files.pythonhosted.org/packages/2f/4c/7c991e080e106d854809030d8584e15b2e996e26f16aee6d757e387bc17d/asyncpg-0.30.0.tar.gz", hash = "sha256:c551e9928ab6707602f44811817f82ba3c446e018bfe1d3abecc8ba5f3eac851", size = 957746 }
wheels = [ wheels = [
@ -902,7 +901,6 @@ dependencies = [
{ name = "pypdf" }, { name = "pypdf" },
{ name = "python-dotenv" }, { name = "python-dotenv" },
{ name = "python-multipart" }, { name = "python-multipart" },
{ name = "s3fs", extra = ["boto3"] },
{ name = "scikit-learn" }, { name = "scikit-learn" },
{ name = "sentry-sdk", extra = ["fastapi"] }, { name = "sentry-sdk", extra = ["fastapi"] },
{ name = "sqlalchemy" }, { name = "sqlalchemy" },
@ -921,6 +919,9 @@ api = [
{ name = "uvicorn" }, { name = "uvicorn" },
{ name = "websockets" }, { name = "websockets" },
] ]
aws = [
{ name = "s3fs", extra = ["boto3"] },
]
chromadb = [ chromadb = [
{ name = "chromadb" }, { name = "chromadb" },
{ name = "pypika" }, { name = "pypika" },
@ -1103,7 +1104,7 @@ requires-dist = [
{ name = "qasync", marker = "extra == 'gui'", specifier = ">=0.27.1,<0.28" }, { name = "qasync", marker = "extra == 'gui'", specifier = ">=0.27.1,<0.28" },
{ name = "qdrant-client", marker = "extra == 'qdrant'", specifier = ">=1.14.2,<2" }, { name = "qdrant-client", marker = "extra == 'qdrant'", specifier = ">=1.14.2,<2" },
{ name = "ruff", marker = "extra == 'dev'", specifier = ">=0.9.2,<1.0.0" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.9.2,<1.0.0" },
{ name = "s3fs", extras = ["boto3"], specifier = "==2025.3.2" }, { name = "s3fs", extras = ["boto3"], marker = "extra == 'aws'", specifier = "==2025.3.2" },
{ name = "scikit-learn", specifier = ">=1.6.1,<2" }, { name = "scikit-learn", specifier = ">=1.6.1,<2" },
{ name = "sentry-sdk", extras = ["fastapi"], specifier = ">=2.9.0,<3" }, { name = "sentry-sdk", extras = ["fastapi"], specifier = ">=2.9.0,<3" },
{ name = "sqlalchemy", specifier = "==2.0.39" }, { name = "sqlalchemy", specifier = "==2.0.39" },
@ -1121,6 +1122,7 @@ requires-dist = [
{ name = "weaviate-client", marker = "extra == 'weaviate'", specifier = "==4.9.6" }, { name = "weaviate-client", marker = "extra == 'weaviate'", specifier = "==4.9.6" },
{ name = "websockets", marker = "extra == 'api'", specifier = ">=15.0.1" }, { name = "websockets", marker = "extra == 'api'", specifier = ">=15.0.1" },
] ]
provides-extras = ["api", "weaviate", "qdrant", "neo4j", "postgres", "postgres-binary", "notebook", "langchain", "llama-index", "gemini", "huggingface", "ollama", "mistral", "anthropic", "deepeval", "posthog", "falkordb", "kuzu", "groq", "milvus", "chromadb", "docs", "codegraph", "evals", "gui", "graphiti", "aws", "dev", "debug"]
[[package]] [[package]]
name = "colorama" name = "colorama"
@ -1784,17 +1786,17 @@ name = "fastembed"
version = "0.6.0" version = "0.6.0"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ dependencies = [
{ name = "huggingface-hub" }, { name = "huggingface-hub", marker = "python_full_version < '3.13'" },
{ name = "loguru" }, { name = "loguru", marker = "python_full_version < '3.13'" },
{ name = "mmh3" }, { name = "mmh3", marker = "python_full_version < '3.13'" },
{ name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, { name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" },
{ name = "numpy", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, { name = "numpy", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.12.*'" },
{ name = "onnxruntime" }, { name = "onnxruntime", marker = "python_full_version < '3.13'" },
{ name = "pillow" }, { name = "pillow", marker = "python_full_version < '3.13'" },
{ name = "py-rust-stemmers" }, { name = "py-rust-stemmers", marker = "python_full_version < '3.13'" },
{ name = "requests" }, { name = "requests", marker = "python_full_version < '3.13'" },
{ name = "tokenizers" }, { name = "tokenizers", marker = "python_full_version < '3.13'" },
{ name = "tqdm" }, { name = "tqdm", marker = "python_full_version < '3.13'" },
] ]
sdist = { url = "https://files.pythonhosted.org/packages/c6/f4/036a656c605f63dc25f11284f60f69900a54a19c513e1ae60d21d6977e75/fastembed-0.6.0.tar.gz", hash = "sha256:5c9ead25f23449535b07243bbe1f370b820dcc77ec2931e61674e3fe7ff24733", size = 50731 } sdist = { url = "https://files.pythonhosted.org/packages/c6/f4/036a656c605f63dc25f11284f60f69900a54a19c513e1ae60d21d6977e75/fastembed-0.6.0.tar.gz", hash = "sha256:5c9ead25f23449535b07243bbe1f370b820dcc77ec2931e61674e3fe7ff24733", size = 50731 }
wheels = [ wheels = [
@ -3765,8 +3767,8 @@ name = "loguru"
version = "0.7.3" version = "0.7.3"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ dependencies = [
{ name = "colorama", marker = "sys_platform == 'win32'" }, { name = "colorama", marker = "python_full_version < '3.13' and sys_platform == 'win32'" },
{ name = "win32-setctime", marker = "sys_platform == 'win32'" }, { name = "win32-setctime", marker = "python_full_version < '3.13' and sys_platform == 'win32'" },
] ]
sdist = { url = "https://files.pythonhosted.org/packages/3a/05/a1dae3dffd1116099471c643b8924f5aa6524411dc6c63fdae648c4f1aca/loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6", size = 63559 } sdist = { url = "https://files.pythonhosted.org/packages/3a/05/a1dae3dffd1116099471c643b8924f5aa6524411dc6c63fdae648c4f1aca/loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6", size = 63559 }
wheels = [ wheels = [