chore: remove unused dependencies

This commit is contained in:
Boris Arzentar 2024-07-20 16:59:25 +02:00
parent 14555a25d0
commit 6ef4bbe862
4 changed files with 12 additions and 1190 deletions

View file

@ -1 +1 @@
from .extract_topics import extract_topics_yake, extract_topics_keybert
from .extract_topics import extract_topics_keybert

View file

@ -5,21 +5,6 @@ from nltk.corpus import stopwords, wordnet
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
def extract_topics_yake(texts: list[str]):
from yake import KeywordExtractor
keyword_extractor = KeywordExtractor(
top = 3,
n = 2,
dedupLim = 0.2,
dedupFunc = "levenshtein", # "seqm" | "levenshtein"
windowsSize = 1,
)
for text in texts:
topics = keyword_extractor.extract_keywords(preprocess_text(text))
yield [topic[0] for topic in topics]
def extract_topics_keybert(texts: list[str]):
from keybert import KeyBERT
@ -98,16 +83,3 @@ def preprocess_text(text: str):
# text = [word for word in text if not word in stop_words]
# return " ".join(text)
if __name__ == "__main__":
import os
file_dir = os.path.dirname(os.path.realpath(__file__))
with open(os.path.join(file_dir, "texts.json"), "r", encoding = "utf-8") as file:
import json
texts = json.load(file)
for topics in extract_topics_yake(texts):
print(topics)
print("\n")

1164
poetry.lock generated

File diff suppressed because it is too large Load diff

View file

@ -53,30 +53,22 @@ pypdf = "^4.1.0"
neo4j = "5.20.0"
jinja2 = "^3.1.3"
matplotlib = "^3.8.3"
nest-asyncio = "^1.6.0"
structlog = "^24.1.0"
tiktoken = "0.7.0"
posthog = "^3.5.0"
lancedb = "0.8.0"
importlib-metadata = "7.1.0"
litellm = "1.38.10"
groq = "0.8.0"
tantivy = "^0.22.0"
huggingface-hub ="0.20.0"
tokenizers ="0.15.2"
transformers ="4.39.0"
python-multipart = "^0.0.9"
langfuse = "^2.32.0"
spacy = "^3.7.4"
protobuf = "<5.0.0"
pydantic-settings = "^2.2.1"
anthropic = "^0.26.1"
langchain-text-splitters = "^0.2.1"
yake = {git = "https://github.com/LIAAD/yake"}
keybert = "^0.8.5"
pdfplumber = "^0.11.1"
sentry-sdk = {extras = ["fastapi"], version = "^2.9.0"}
langchain-core = '0.2.19'