addressed issues

2025-09-07 15:56:11 -07:00 · 2025-09-07 15:56:11 -07:00 · aa445cfaa7
commit aa445cfaa7
parent dd3049bf03
7 changed files with 11 additions and 109 deletions
--- a/cognee/eval_framework/modal_eval_dashboard.py
+++ b/cognee/eval_framework/modal_eval_dashboard.py
@ -16,7 +16,7 @@ metrics_volume = modal.Volume.from_name("evaluation_dashboard_results", create_i

 image = (
    modal.Image.debian_slim(python_version="3.11")
-    .pip_install("streamlit", "plotly")
+    .pip_install("streamlit", "pandas", "plotly")
    .add_local_file(__file__, "/root/serve_dashboard.py")
 )

--- a/cognee/infrastructure/data/utils/extract_keywords.py
+++ b/cognee/infrastructure/data/utils/extract_keywords.py
@ -1,45 +0,0 @@
-from cognee.infrastructure.data.exceptions.exceptions import KeywordExtractionError
-
-
-# def extract_keywords(text: str) -> list[str]:
-#     """
-#     Extract keywords from the provided text string.
-
-#     This function raises an KeyWordExtractionError if the input text is empty. It processes the
-#     text to extract parts of speech, focusing on nouns, and uses TF-IDF to identify the most
-#     relevant keywords based on their frequency. The function returns a list of up to 15
-#     keywords, each having more than 3 characters.
-
-#     Parameters:
-#     -----------
-
-#         - text (str): The input text from which to extract keywords.
-
-#     Returns:
-#     --------
-
-#         - list[str]: A list of keywords extracted from the text, containing up to 15 nouns
-#           with more than 3 characters.
-#     """
-#     if len(text) == 0:
-#         raise KeywordExtractionError()
-
-#     tags = extract_pos_tags(text)
-#     nouns = [word for (word, tag) in tags if tag == "NN"]
-
-#     vectorizer = TfidfVectorizer()
-#     tfidf = vectorizer.fit_transform(nouns)
-
-#     top_nouns = sorted(
-#         vectorizer.vocabulary_, key=lambda x: tfidf[0, vectorizer.vocabulary_[x]], reverse=True
-#     )
-
-#     keywords = []
-
-#     for word in top_nouns:
-#         if len(word) > 3:
-#             keywords.append(word)
-#         if len(keywords) >= 15:
-#             break
-
-#     return keywords
--- a/cognee/infrastructure/llm/config.py
+++ b/cognee/infrastructure/llm/config.py
@ -75,6 +75,13 @@ class LLMConfig(BaseSettings):

    def model_post_init(self, __context) -> None:
        """Initialize the BAML registry after the model is created."""
+        # Check if BAML is selected as structured output framework but not available
+        if self.structured_output_framework == "baml" and ClientRegistry is None:
+            raise ImportError(
+                "BAML is selected as structured output framework but not available. "
+                "Please install with 'pip install cognee[baml]' to use BAML extraction features."
+            )
+
        if ClientRegistry is not None:
            self.baml_registry = ClientRegistry()
            self.baml_registry.add_llm_client(
--- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py
+++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py
@ -37,11 +37,6 @@ async def extract_summary(content: str, response_model: Type[BaseModel]):
    """
    config = get_llm_config()

-    if config.baml_registry is None:
-        raise ImportError(
-            "BAML is not available. Please install with 'pip install cognee[baml]' to use BAML extraction features."
-        )
-
    # Use BAML's SummarizeContent function
    summary_result = await b.SummarizeContent(
        content, baml_options={"client_registry": config.baml_registry}
@ -82,11 +77,6 @@ async def extract_code_summary(content: str):
        try:
            config = get_llm_config()

-            if config.baml_registry is None:
-                raise ImportError(
-                    "BAML is not available. Please install with 'pip install cognee[baml]' to use BAML extraction features."
-                )
-
            result = await b.SummarizeCode(
                content, baml_options={"client_registry": config.baml_registry}
            )
--- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py
+++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py
@ -16,11 +16,6 @@ async def extract_content_graph(

    get_logger(level="INFO")

-    if config.baml_registry is None:
-        raise ImportError(
-            "BAML is not available. Please install with 'pip install cognee[baml]' to use BAML extraction features."
-        )
-
    # if response_model:
    #     # tb = TypeBuilder()
    #     # country = tb.union \
--- a/cognee/shared/utils.py
+++ b/cognee/shared/utils.py
@ -17,46 +17,6 @@ from cognee.infrastructure.databases.graph import get_graph_engine
 proxy_url = "https://test.prometh.ai"


-def get_entities(tagged_tokens):
-    try:
-        import nltk
-
-        nltk.download("maxent_ne_chunker", quiet=True)
-        from nltk.chunk import ne_chunk
-
-        return ne_chunk(tagged_tokens)
-    except ImportError:
-        raise ImportError(
-            "NLTK is required for entity extraction. Install with 'pip install cognee[nlp]' to use this feature."
-        )
-
-
-def extract_pos_tags(sentence):
-    """Extract Part-of-Speech (POS) tags for words in a sentence."""
-    try:
-        import nltk
-
-        # Ensure that the necessary NLTK resources are downloaded
-        nltk.download("words", quiet=True)
-        nltk.download("punkt", quiet=True)
-        nltk.download("averaged_perceptron_tagger", quiet=True)
-
-        from nltk.tag import pos_tag
-        from nltk.tokenize import word_tokenize
-
-        # Tokenize the sentence into words
-        tokens = word_tokenize(sentence)
-
-        # Tag each word with its corresponding POS tag
-        pos_tags = pos_tag(tokens)
-
-        return pos_tags
-    except ImportError:
-        raise ImportError(
-            "NLTK is required for POS tagging. Install with 'pip install cognee[nlp]' to use this feature."
-        )
-
-
 def get_anonymous_id():
    """Creates or reads a anonymous user id"""
    tracking_id = os.getenv("TRACKING_ID", None)
--- a/pyproject.toml
+++ b/pyproject.toml
@ -49,7 +49,8 @@ dependencies = [
    "python-magic-bin<0.5 ; platform_system == 'Windows'", # Only needed for Windows
    "fastembed<=0.6.0 ",
    "networkx>=3.4.2,<4",
-    "matplotlib>=3.8.3,<4"
+    "matplotlib>=3.8.3,<4",
+    "baml-py>=0.201.0"

 ]

@ -66,18 +67,12 @@ distributed = [
 # Database backends
 neo4j = ["neo4j>=5.28.0,<6"]
 neptune = ["langchain_aws>=0.2.22"]
-# PostgreSQL support (binary - no compilation required)
 postgres = [
+    "psycopg2>=2.9.10,<3",
    "psycopg2-binary>=2.9.10,<3.0.0",  # Pre-compiled binary, no PostgreSQL headers needed
    "pgvector>=0.3.5,<0.4",
    "asyncpg>=0.30.0,<1.0.0",
 ]
-# PostgreSQL support (source - requires PostgreSQL development headers)
-postgres-source = [
-    "psycopg2>=2.9.10,<3 ; platform_system != 'Windows'",  # Requires libpq-dev, build tools
-    "pgvector>=0.3.5,<0.4",
-    "asyncpg>=0.30.0,<1.0.0",
-]
 notebook = ["notebook>=7.1.0,<8"]
 langchain = [
    "langsmith>=0.2.3,<1.0.0",