added fixes for nltk
This commit is contained in:
parent
9110a2b59b
commit
3429af32c2
2 changed files with 41 additions and 0 deletions
|
|
@ -71,6 +71,25 @@ async def cognee_pipeline(
|
||||||
if cognee_pipeline.first_run:
|
if cognee_pipeline.first_run:
|
||||||
from cognee.infrastructure.llm.utils import test_llm_connection, test_embedding_connection
|
from cognee.infrastructure.llm.utils import test_llm_connection, test_embedding_connection
|
||||||
|
|
||||||
|
# Ensure NLTK data is downloaded on first run
|
||||||
|
def ensure_nltk_data():
|
||||||
|
"""Download required NLTK data if not already present."""
|
||||||
|
try:
|
||||||
|
import nltk
|
||||||
|
|
||||||
|
# Download essential NLTK data used by the system
|
||||||
|
nltk.download("punkt_tab", quiet=True)
|
||||||
|
nltk.download("punkt", quiet=True)
|
||||||
|
nltk.download("averaged_perceptron_tagger", quiet=True)
|
||||||
|
nltk.download("averaged_perceptron_tagger_eng", quiet=True)
|
||||||
|
nltk.download("maxent_ne_chunker", quiet=True)
|
||||||
|
nltk.download("words", quiet=True)
|
||||||
|
logger.info("NLTK data initialized successfully")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to initialize NLTK data: {e}")
|
||||||
|
|
||||||
|
ensure_nltk_data()
|
||||||
|
|
||||||
# Test LLM and Embedding configuration once before running Cognee
|
# Test LLM and Embedding configuration once before running Cognee
|
||||||
await test_llm_connection()
|
await test_llm_connection()
|
||||||
await test_embedding_connection()
|
await test_embedding_connection()
|
||||||
|
|
|
||||||
|
|
@ -54,6 +54,28 @@ async def plugin_ingest_data(
|
||||||
if not user:
|
if not user:
|
||||||
user = await get_default_user()
|
user = await get_default_user()
|
||||||
|
|
||||||
|
# Ensure NLTK data is downloaded (preserves automatic download behavior)
|
||||||
|
def ensure_nltk_data():
|
||||||
|
"""Download required NLTK data if not already present."""
|
||||||
|
try:
|
||||||
|
import nltk
|
||||||
|
|
||||||
|
# Download essential NLTK data used by the system
|
||||||
|
nltk.download("punkt_tab", quiet=True)
|
||||||
|
nltk.download("punkt", quiet=True)
|
||||||
|
nltk.download("averaged_perceptron_tagger", quiet=True)
|
||||||
|
nltk.download("averaged_perceptron_tagger_eng", quiet=True)
|
||||||
|
nltk.download("maxent_ne_chunker", quiet=True)
|
||||||
|
nltk.download("words", quiet=True)
|
||||||
|
logger.info("NLTK data verified/downloaded successfully")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to download NLTK data: {e}")
|
||||||
|
|
||||||
|
# Download NLTK data once per session
|
||||||
|
if not hasattr(plugin_ingest_data, "_nltk_initialized"):
|
||||||
|
ensure_nltk_data()
|
||||||
|
plugin_ingest_data._nltk_initialized = True
|
||||||
|
|
||||||
# Initialize S3 support (maintain existing behavior)
|
# Initialize S3 support (maintain existing behavior)
|
||||||
s3_config = get_s3_config()
|
s3_config = get_s3_config()
|
||||||
fs = None
|
fs = None
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue