diff --git a/cognee/shared/utils.py b/cognee/shared/utils.py index 749cfef66..612a9399f 100644 --- a/cognee/shared/utils.py +++ b/cognee/shared/utils.py @@ -11,7 +11,7 @@ import networkx as nx import pandas as pd import matplotlib.pyplot as plt import tiktoken - +import nltk import base64 import time @@ -30,6 +30,34 @@ from cognee.shared.exceptions import IngestionError proxy_url = "https://test.prometh.ai" + +def get_entities(tagged_tokens): + nltk.download("maxent_ne_chunker", quiet=True) + from nltk.chunk import ne_chunk + + return ne_chunk(tagged_tokens) + + +def extract_pos_tags(sentence): + """Extract Part-of-Speech (POS) tags for words in a sentence.""" + + # Ensure that the necessary NLTK resources are downloaded + nltk.download("words", quiet=True) + nltk.download("punkt", quiet=True) + nltk.download("averaged_perceptron_tagger", quiet=True) + + from nltk.tag import pos_tag + from nltk.tokenize import word_tokenize + + # Tokenize the sentence into words + tokens = word_tokenize(sentence) + + # Tag each word with its corresponding POS tag + pos_tags = pos_tag(tokens) + + return pos_tags + + def get_anonymous_id(): """Creates or reads a anonymous user id""" home_dir = str(pathlib.Path(pathlib.Path(__file__).parent.parent.parent.resolve()))