From ad07bae9a781c909c67658261d579de5834ea3b1 Mon Sep 17 00:00:00 2001 From: vasilije Date: Tue, 14 Jan 2025 14:10:23 +0100 Subject: [PATCH] Add data visualization for Anthropic --- cognee/shared/utils.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/cognee/shared/utils.py b/cognee/shared/utils.py index 749cfef66..612a9399f 100644 --- a/cognee/shared/utils.py +++ b/cognee/shared/utils.py @@ -11,7 +11,7 @@ import networkx as nx import pandas as pd import matplotlib.pyplot as plt import tiktoken - +import nltk import base64 import time @@ -30,6 +30,34 @@ from cognee.shared.exceptions import IngestionError proxy_url = "https://test.prometh.ai" + +def get_entities(tagged_tokens): + nltk.download("maxent_ne_chunker", quiet=True) + from nltk.chunk import ne_chunk + + return ne_chunk(tagged_tokens) + + +def extract_pos_tags(sentence): + """Extract Part-of-Speech (POS) tags for words in a sentence.""" + + # Ensure that the necessary NLTK resources are downloaded + nltk.download("words", quiet=True) + nltk.download("punkt", quiet=True) + nltk.download("averaged_perceptron_tagger", quiet=True) + + from nltk.tag import pos_tag + from nltk.tokenize import word_tokenize + + # Tokenize the sentence into words + tokens = word_tokenize(sentence) + + # Tag each word with its corresponding POS tag + pos_tags = pos_tag(tokens) + + return pos_tags + + def get_anonymous_id(): """Creates or reads a anonymous user id""" home_dir = str(pathlib.Path(pathlib.Path(__file__).parent.parent.parent.resolve()))