addressed issues

This commit is contained in:
vasilije 2025-09-07 15:56:11 -07:00
parent dd3049bf03
commit aa445cfaa7
7 changed files with 11 additions and 109 deletions

View file

@ -16,7 +16,7 @@ metrics_volume = modal.Volume.from_name("evaluation_dashboard_results", create_i
image = (
modal.Image.debian_slim(python_version="3.11")
.pip_install("streamlit", "plotly")
.pip_install("streamlit", "pandas", "plotly")
.add_local_file(__file__, "/root/serve_dashboard.py")
)

View file

@ -1,45 +0,0 @@
from cognee.infrastructure.data.exceptions.exceptions import KeywordExtractionError
# def extract_keywords(text: str) -> list[str]:
# """
# Extract keywords from the provided text string.
# This function raises an KeyWordExtractionError if the input text is empty. It processes the
# text to extract parts of speech, focusing on nouns, and uses TF-IDF to identify the most
# relevant keywords based on their frequency. The function returns a list of up to 15
# keywords, each having more than 3 characters.
# Parameters:
# -----------
# - text (str): The input text from which to extract keywords.
# Returns:
# --------
# - list[str]: A list of keywords extracted from the text, containing up to 15 nouns
# with more than 3 characters.
# """
# if len(text) == 0:
# raise KeywordExtractionError()
# tags = extract_pos_tags(text)
# nouns = [word for (word, tag) in tags if tag == "NN"]
# vectorizer = TfidfVectorizer()
# tfidf = vectorizer.fit_transform(nouns)
# top_nouns = sorted(
# vectorizer.vocabulary_, key=lambda x: tfidf[0, vectorizer.vocabulary_[x]], reverse=True
# )
# keywords = []
# for word in top_nouns:
# if len(word) > 3:
# keywords.append(word)
# if len(keywords) >= 15:
# break
# return keywords

View file

@ -75,6 +75,13 @@ class LLMConfig(BaseSettings):
def model_post_init(self, __context) -> None:
"""Initialize the BAML registry after the model is created."""
# Check if BAML is selected as structured output framework but not available
if self.structured_output_framework == "baml" and ClientRegistry is None:
raise ImportError(
"BAML is selected as structured output framework but not available. "
"Please install with 'pip install cognee[baml]' to use BAML extraction features."
)
if ClientRegistry is not None:
self.baml_registry = ClientRegistry()
self.baml_registry.add_llm_client(

View file

@ -37,11 +37,6 @@ async def extract_summary(content: str, response_model: Type[BaseModel]):
"""
config = get_llm_config()
if config.baml_registry is None:
raise ImportError(
"BAML is not available. Please install with 'pip install cognee[baml]' to use BAML extraction features."
)
# Use BAML's SummarizeContent function
summary_result = await b.SummarizeContent(
content, baml_options={"client_registry": config.baml_registry}
@ -82,11 +77,6 @@ async def extract_code_summary(content: str):
try:
config = get_llm_config()
if config.baml_registry is None:
raise ImportError(
"BAML is not available. Please install with 'pip install cognee[baml]' to use BAML extraction features."
)
result = await b.SummarizeCode(
content, baml_options={"client_registry": config.baml_registry}
)

View file

@ -16,11 +16,6 @@ async def extract_content_graph(
get_logger(level="INFO")
if config.baml_registry is None:
raise ImportError(
"BAML is not available. Please install with 'pip install cognee[baml]' to use BAML extraction features."
)
# if response_model:
# # tb = TypeBuilder()
# # country = tb.union \

View file

@ -17,46 +17,6 @@ from cognee.infrastructure.databases.graph import get_graph_engine
proxy_url = "https://test.prometh.ai"
def get_entities(tagged_tokens):
try:
import nltk
nltk.download("maxent_ne_chunker", quiet=True)
from nltk.chunk import ne_chunk
return ne_chunk(tagged_tokens)
except ImportError:
raise ImportError(
"NLTK is required for entity extraction. Install with 'pip install cognee[nlp]' to use this feature."
)
def extract_pos_tags(sentence):
"""Extract Part-of-Speech (POS) tags for words in a sentence."""
try:
import nltk
# Ensure that the necessary NLTK resources are downloaded
nltk.download("words", quiet=True)
nltk.download("punkt", quiet=True)
nltk.download("averaged_perceptron_tagger", quiet=True)
from nltk.tag import pos_tag
from nltk.tokenize import word_tokenize
# Tokenize the sentence into words
tokens = word_tokenize(sentence)
# Tag each word with its corresponding POS tag
pos_tags = pos_tag(tokens)
return pos_tags
except ImportError:
raise ImportError(
"NLTK is required for POS tagging. Install with 'pip install cognee[nlp]' to use this feature."
)
def get_anonymous_id():
"""Creates or reads a anonymous user id"""
tracking_id = os.getenv("TRACKING_ID", None)

View file

@ -49,7 +49,8 @@ dependencies = [
"python-magic-bin<0.5 ; platform_system == 'Windows'", # Only needed for Windows
"fastembed<=0.6.0 ",
"networkx>=3.4.2,<4",
"matplotlib>=3.8.3,<4"
"matplotlib>=3.8.3,<4",
"baml-py>=0.201.0"
]
@ -66,18 +67,12 @@ distributed = [
# Database backends
neo4j = ["neo4j>=5.28.0,<6"]
neptune = ["langchain_aws>=0.2.22"]
# PostgreSQL support (binary - no compilation required)
postgres = [
"psycopg2>=2.9.10,<3",
"psycopg2-binary>=2.9.10,<3.0.0", # Pre-compiled binary, no PostgreSQL headers needed
"pgvector>=0.3.5,<0.4",
"asyncpg>=0.30.0,<1.0.0",
]
# PostgreSQL support (source - requires PostgreSQL development headers)
postgres-source = [
"psycopg2>=2.9.10,<3 ; platform_system != 'Windows'", # Requires libpq-dev, build tools
"pgvector>=0.3.5,<0.4",
"asyncpg>=0.30.0,<1.0.0",
]
notebook = ["notebook>=7.1.0,<8"]
langchain = [
"langsmith>=0.2.3,<1.0.0",