From fc4440da8c7b7cdfd4087f34c40ac90cc86bb839 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 15 Oct 2025 14:43:21 +0200 Subject: [PATCH] refactor: update env template --- .env.template | 5 ++--- .../loaders/external/advanced_pdf_loader.py | 10 ++-------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/.env.template b/.env.template index 7fd3ba9e8..3137636d3 100644 --- a/.env.template +++ b/.env.template @@ -28,11 +28,10 @@ EMBEDDING_ENDPOINT="" EMBEDDING_API_VERSION="" EMBEDDING_DIMENSIONS=3072 EMBEDDING_MAX_TOKENS=8191 +EMBEDDING_BATCH_SIZE=36 # If embedding key is not provided same key set for LLM_API_KEY will be used #EMBEDDING_API_KEY="your_api_key" -# Note: OpenAI support up to 2048 elements and Gemini supports a maximum of 100 elements in an embedding batch, -# Cognee sets the optimal batch size for OpenAI and Gemini, but a custom size can be defined if necessary for other models -#EMBEDDING_BATCH_SIZE=2048 + # If using BAML structured output these env variables will be used BAML_LLM_PROVIDER=openai diff --git a/cognee/infrastructure/loaders/external/advanced_pdf_loader.py b/cognee/infrastructure/loaders/external/advanced_pdf_loader.py index 7bab8cac6..6d1412b77 100644 --- a/cognee/infrastructure/loaders/external/advanced_pdf_loader.py +++ b/cognee/infrastructure/loaders/external/advanced_pdf_loader.py @@ -14,14 +14,6 @@ from cognee.infrastructure.loaders.external.pypdf_loader import PyPdfLoader logger = get_logger(__name__) -try: - from unstructured.partition.pdf import partition_pdf -except ImportError as e: - logger.info( - "unstructured[pdf] not installed, can't use AdvancedPdfLoader, will use PyPdfLoader instead." - ) - raise ImportError from e - @dataclass class _PageBuffer: @@ -88,6 +80,8 @@ class AdvancedPdfLoader(LoaderInterface): **kwargs, } # Use partition to extract elements + from unstructured.partition.pdf import partition_pdf + elements = partition_pdf(**partition_kwargs) # Process elements into text content