added fixes for nltk
This commit is contained in:
parent
9110a2b59b
commit
3429af32c2
2 changed files with 41 additions and 0 deletions
|
|
@ -71,6 +71,25 @@ async def cognee_pipeline(
|
|||
if cognee_pipeline.first_run:
|
||||
from cognee.infrastructure.llm.utils import test_llm_connection, test_embedding_connection
|
||||
|
||||
# Ensure NLTK data is downloaded on first run
|
||||
def ensure_nltk_data():
|
||||
"""Download required NLTK data if not already present."""
|
||||
try:
|
||||
import nltk
|
||||
|
||||
# Download essential NLTK data used by the system
|
||||
nltk.download("punkt_tab", quiet=True)
|
||||
nltk.download("punkt", quiet=True)
|
||||
nltk.download("averaged_perceptron_tagger", quiet=True)
|
||||
nltk.download("averaged_perceptron_tagger_eng", quiet=True)
|
||||
nltk.download("maxent_ne_chunker", quiet=True)
|
||||
nltk.download("words", quiet=True)
|
||||
logger.info("NLTK data initialized successfully")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to initialize NLTK data: {e}")
|
||||
|
||||
ensure_nltk_data()
|
||||
|
||||
# Test LLM and Embedding configuration once before running Cognee
|
||||
await test_llm_connection()
|
||||
await test_embedding_connection()
|
||||
|
|
|
|||
|
|
@ -54,6 +54,28 @@ async def plugin_ingest_data(
|
|||
if not user:
|
||||
user = await get_default_user()
|
||||
|
||||
# Ensure NLTK data is downloaded (preserves automatic download behavior)
|
||||
def ensure_nltk_data():
|
||||
"""Download required NLTK data if not already present."""
|
||||
try:
|
||||
import nltk
|
||||
|
||||
# Download essential NLTK data used by the system
|
||||
nltk.download("punkt_tab", quiet=True)
|
||||
nltk.download("punkt", quiet=True)
|
||||
nltk.download("averaged_perceptron_tagger", quiet=True)
|
||||
nltk.download("averaged_perceptron_tagger_eng", quiet=True)
|
||||
nltk.download("maxent_ne_chunker", quiet=True)
|
||||
nltk.download("words", quiet=True)
|
||||
logger.info("NLTK data verified/downloaded successfully")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to download NLTK data: {e}")
|
||||
|
||||
# Download NLTK data once per session
|
||||
if not hasattr(plugin_ingest_data, "_nltk_initialized"):
|
||||
ensure_nltk_data()
|
||||
plugin_ingest_data._nltk_initialized = True
|
||||
|
||||
# Initialize S3 support (maintain existing behavior)
|
||||
s3_config = get_s3_config()
|
||||
fs = None
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue