refactor: update env template
This commit is contained in:
parent
1b28f13743
commit
fc4440da8c
2 changed files with 4 additions and 11 deletions
|
|
@ -28,11 +28,10 @@ EMBEDDING_ENDPOINT=""
|
||||||
EMBEDDING_API_VERSION=""
|
EMBEDDING_API_VERSION=""
|
||||||
EMBEDDING_DIMENSIONS=3072
|
EMBEDDING_DIMENSIONS=3072
|
||||||
EMBEDDING_MAX_TOKENS=8191
|
EMBEDDING_MAX_TOKENS=8191
|
||||||
|
EMBEDDING_BATCH_SIZE=36
|
||||||
# If embedding key is not provided same key set for LLM_API_KEY will be used
|
# If embedding key is not provided same key set for LLM_API_KEY will be used
|
||||||
#EMBEDDING_API_KEY="your_api_key"
|
#EMBEDDING_API_KEY="your_api_key"
|
||||||
# Note: OpenAI support up to 2048 elements and Gemini supports a maximum of 100 elements in an embedding batch,
|
|
||||||
# Cognee sets the optimal batch size for OpenAI and Gemini, but a custom size can be defined if necessary for other models
|
|
||||||
#EMBEDDING_BATCH_SIZE=2048
|
|
||||||
|
|
||||||
# If using BAML structured output these env variables will be used
|
# If using BAML structured output these env variables will be used
|
||||||
BAML_LLM_PROVIDER=openai
|
BAML_LLM_PROVIDER=openai
|
||||||
|
|
|
||||||
|
|
@ -14,14 +14,6 @@ from cognee.infrastructure.loaders.external.pypdf_loader import PyPdfLoader
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
try:
|
|
||||||
from unstructured.partition.pdf import partition_pdf
|
|
||||||
except ImportError as e:
|
|
||||||
logger.info(
|
|
||||||
"unstructured[pdf] not installed, can't use AdvancedPdfLoader, will use PyPdfLoader instead."
|
|
||||||
)
|
|
||||||
raise ImportError from e
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class _PageBuffer:
|
class _PageBuffer:
|
||||||
|
|
@ -88,6 +80,8 @@ class AdvancedPdfLoader(LoaderInterface):
|
||||||
**kwargs,
|
**kwargs,
|
||||||
}
|
}
|
||||||
# Use partition to extract elements
|
# Use partition to extract elements
|
||||||
|
from unstructured.partition.pdf import partition_pdf
|
||||||
|
|
||||||
elements = partition_pdf(**partition_kwargs)
|
elements = partition_pdf(**partition_kwargs)
|
||||||
|
|
||||||
# Process elements into text content
|
# Process elements into text content
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue