addressed issues
This commit is contained in:
parent
dd3049bf03
commit
aa445cfaa7
7 changed files with 11 additions and 109 deletions
|
|
@ -16,7 +16,7 @@ metrics_volume = modal.Volume.from_name("evaluation_dashboard_results", create_i
|
|||
|
||||
image = (
|
||||
modal.Image.debian_slim(python_version="3.11")
|
||||
.pip_install("streamlit", "plotly")
|
||||
.pip_install("streamlit", "pandas", "plotly")
|
||||
.add_local_file(__file__, "/root/serve_dashboard.py")
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,45 +0,0 @@
|
|||
from cognee.infrastructure.data.exceptions.exceptions import KeywordExtractionError
|
||||
|
||||
|
||||
# def extract_keywords(text: str) -> list[str]:
|
||||
# """
|
||||
# Extract keywords from the provided text string.
|
||||
|
||||
# This function raises an KeyWordExtractionError if the input text is empty. It processes the
|
||||
# text to extract parts of speech, focusing on nouns, and uses TF-IDF to identify the most
|
||||
# relevant keywords based on their frequency. The function returns a list of up to 15
|
||||
# keywords, each having more than 3 characters.
|
||||
|
||||
# Parameters:
|
||||
# -----------
|
||||
|
||||
# - text (str): The input text from which to extract keywords.
|
||||
|
||||
# Returns:
|
||||
# --------
|
||||
|
||||
# - list[str]: A list of keywords extracted from the text, containing up to 15 nouns
|
||||
# with more than 3 characters.
|
||||
# """
|
||||
# if len(text) == 0:
|
||||
# raise KeywordExtractionError()
|
||||
|
||||
# tags = extract_pos_tags(text)
|
||||
# nouns = [word for (word, tag) in tags if tag == "NN"]
|
||||
|
||||
# vectorizer = TfidfVectorizer()
|
||||
# tfidf = vectorizer.fit_transform(nouns)
|
||||
|
||||
# top_nouns = sorted(
|
||||
# vectorizer.vocabulary_, key=lambda x: tfidf[0, vectorizer.vocabulary_[x]], reverse=True
|
||||
# )
|
||||
|
||||
# keywords = []
|
||||
|
||||
# for word in top_nouns:
|
||||
# if len(word) > 3:
|
||||
# keywords.append(word)
|
||||
# if len(keywords) >= 15:
|
||||
# break
|
||||
|
||||
# return keywords
|
||||
|
|
@ -75,6 +75,13 @@ class LLMConfig(BaseSettings):
|
|||
|
||||
def model_post_init(self, __context) -> None:
|
||||
"""Initialize the BAML registry after the model is created."""
|
||||
# Check if BAML is selected as structured output framework but not available
|
||||
if self.structured_output_framework == "baml" and ClientRegistry is None:
|
||||
raise ImportError(
|
||||
"BAML is selected as structured output framework but not available. "
|
||||
"Please install with 'pip install cognee[baml]' to use BAML extraction features."
|
||||
)
|
||||
|
||||
if ClientRegistry is not None:
|
||||
self.baml_registry = ClientRegistry()
|
||||
self.baml_registry.add_llm_client(
|
||||
|
|
|
|||
|
|
@ -37,11 +37,6 @@ async def extract_summary(content: str, response_model: Type[BaseModel]):
|
|||
"""
|
||||
config = get_llm_config()
|
||||
|
||||
if config.baml_registry is None:
|
||||
raise ImportError(
|
||||
"BAML is not available. Please install with 'pip install cognee[baml]' to use BAML extraction features."
|
||||
)
|
||||
|
||||
# Use BAML's SummarizeContent function
|
||||
summary_result = await b.SummarizeContent(
|
||||
content, baml_options={"client_registry": config.baml_registry}
|
||||
|
|
@ -82,11 +77,6 @@ async def extract_code_summary(content: str):
|
|||
try:
|
||||
config = get_llm_config()
|
||||
|
||||
if config.baml_registry is None:
|
||||
raise ImportError(
|
||||
"BAML is not available. Please install with 'pip install cognee[baml]' to use BAML extraction features."
|
||||
)
|
||||
|
||||
result = await b.SummarizeCode(
|
||||
content, baml_options={"client_registry": config.baml_registry}
|
||||
)
|
||||
|
|
|
|||
|
|
@ -16,11 +16,6 @@ async def extract_content_graph(
|
|||
|
||||
get_logger(level="INFO")
|
||||
|
||||
if config.baml_registry is None:
|
||||
raise ImportError(
|
||||
"BAML is not available. Please install with 'pip install cognee[baml]' to use BAML extraction features."
|
||||
)
|
||||
|
||||
# if response_model:
|
||||
# # tb = TypeBuilder()
|
||||
# # country = tb.union \
|
||||
|
|
|
|||
|
|
@ -17,46 +17,6 @@ from cognee.infrastructure.databases.graph import get_graph_engine
|
|||
proxy_url = "https://test.prometh.ai"
|
||||
|
||||
|
||||
def get_entities(tagged_tokens):
|
||||
try:
|
||||
import nltk
|
||||
|
||||
nltk.download("maxent_ne_chunker", quiet=True)
|
||||
from nltk.chunk import ne_chunk
|
||||
|
||||
return ne_chunk(tagged_tokens)
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"NLTK is required for entity extraction. Install with 'pip install cognee[nlp]' to use this feature."
|
||||
)
|
||||
|
||||
|
||||
def extract_pos_tags(sentence):
|
||||
"""Extract Part-of-Speech (POS) tags for words in a sentence."""
|
||||
try:
|
||||
import nltk
|
||||
|
||||
# Ensure that the necessary NLTK resources are downloaded
|
||||
nltk.download("words", quiet=True)
|
||||
nltk.download("punkt", quiet=True)
|
||||
nltk.download("averaged_perceptron_tagger", quiet=True)
|
||||
|
||||
from nltk.tag import pos_tag
|
||||
from nltk.tokenize import word_tokenize
|
||||
|
||||
# Tokenize the sentence into words
|
||||
tokens = word_tokenize(sentence)
|
||||
|
||||
# Tag each word with its corresponding POS tag
|
||||
pos_tags = pos_tag(tokens)
|
||||
|
||||
return pos_tags
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"NLTK is required for POS tagging. Install with 'pip install cognee[nlp]' to use this feature."
|
||||
)
|
||||
|
||||
|
||||
def get_anonymous_id():
|
||||
"""Creates or reads a anonymous user id"""
|
||||
tracking_id = os.getenv("TRACKING_ID", None)
|
||||
|
|
|
|||
|
|
@ -49,7 +49,8 @@ dependencies = [
|
|||
"python-magic-bin<0.5 ; platform_system == 'Windows'", # Only needed for Windows
|
||||
"fastembed<=0.6.0 ",
|
||||
"networkx>=3.4.2,<4",
|
||||
"matplotlib>=3.8.3,<4"
|
||||
"matplotlib>=3.8.3,<4",
|
||||
"baml-py>=0.201.0"
|
||||
|
||||
]
|
||||
|
||||
|
|
@ -66,18 +67,12 @@ distributed = [
|
|||
# Database backends
|
||||
neo4j = ["neo4j>=5.28.0,<6"]
|
||||
neptune = ["langchain_aws>=0.2.22"]
|
||||
# PostgreSQL support (binary - no compilation required)
|
||||
postgres = [
|
||||
"psycopg2>=2.9.10,<3",
|
||||
"psycopg2-binary>=2.9.10,<3.0.0", # Pre-compiled binary, no PostgreSQL headers needed
|
||||
"pgvector>=0.3.5,<0.4",
|
||||
"asyncpg>=0.30.0,<1.0.0",
|
||||
]
|
||||
# PostgreSQL support (source - requires PostgreSQL development headers)
|
||||
postgres-source = [
|
||||
"psycopg2>=2.9.10,<3 ; platform_system != 'Windows'", # Requires libpq-dev, build tools
|
||||
"pgvector>=0.3.5,<0.4",
|
||||
"asyncpg>=0.30.0,<1.0.0",
|
||||
]
|
||||
notebook = ["notebook>=7.1.0,<8"]
|
||||
langchain = [
|
||||
"langsmith>=0.2.3,<1.0.0",
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue