refactor: update env template

This commit is contained in:
Igor Ilic 2025-10-15 14:43:21 +02:00
parent 1b28f13743
commit fc4440da8c
2 changed files with 4 additions and 11 deletions

View file

@ -28,11 +28,10 @@ EMBEDDING_ENDPOINT=""
EMBEDDING_API_VERSION=""
EMBEDDING_DIMENSIONS=3072
EMBEDDING_MAX_TOKENS=8191
EMBEDDING_BATCH_SIZE=36
# If embedding key is not provided same key set for LLM_API_KEY will be used
#EMBEDDING_API_KEY="your_api_key"
# Note: OpenAI support up to 2048 elements and Gemini supports a maximum of 100 elements in an embedding batch,
# Cognee sets the optimal batch size for OpenAI and Gemini, but a custom size can be defined if necessary for other models
#EMBEDDING_BATCH_SIZE=2048
# If using BAML structured output these env variables will be used
BAML_LLM_PROVIDER=openai

View file

@ -14,14 +14,6 @@ from cognee.infrastructure.loaders.external.pypdf_loader import PyPdfLoader
logger = get_logger(__name__)
try:
from unstructured.partition.pdf import partition_pdf
except ImportError as e:
logger.info(
"unstructured[pdf] not installed, can't use AdvancedPdfLoader, will use PyPdfLoader instead."
)
raise ImportError from e
@dataclass
class _PageBuffer:
@ -88,6 +80,8 @@ class AdvancedPdfLoader(LoaderInterface):
**kwargs,
}
# Use partition to extract elements
from unstructured.partition.pdf import partition_pdf
elements = partition_pdf(**partition_kwargs)
# Process elements into text content