chore: remove unused dependencies
This commit is contained in:
parent
14555a25d0
commit
6ef4bbe862
4 changed files with 12 additions and 1190 deletions
|
|
@ -1 +1 @@
|
|||
from .extract_topics import extract_topics_yake, extract_topics_keybert
|
||||
from .extract_topics import extract_topics_keybert
|
||||
|
|
|
|||
|
|
@ -5,21 +5,6 @@ from nltk.corpus import stopwords, wordnet
|
|||
from nltk.tokenize import word_tokenize
|
||||
from nltk.stem import WordNetLemmatizer
|
||||
|
||||
def extract_topics_yake(texts: list[str]):
|
||||
from yake import KeywordExtractor
|
||||
|
||||
keyword_extractor = KeywordExtractor(
|
||||
top = 3,
|
||||
n = 2,
|
||||
dedupLim = 0.2,
|
||||
dedupFunc = "levenshtein", # "seqm" | "levenshtein"
|
||||
windowsSize = 1,
|
||||
)
|
||||
|
||||
for text in texts:
|
||||
topics = keyword_extractor.extract_keywords(preprocess_text(text))
|
||||
yield [topic[0] for topic in topics]
|
||||
|
||||
def extract_topics_keybert(texts: list[str]):
|
||||
from keybert import KeyBERT
|
||||
|
||||
|
|
@ -98,16 +83,3 @@ def preprocess_text(text: str):
|
|||
# text = [word for word in text if not word in stop_words]
|
||||
# return " ".join(text)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import os
|
||||
|
||||
file_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
with open(os.path.join(file_dir, "texts.json"), "r", encoding = "utf-8") as file:
|
||||
import json
|
||||
texts = json.load(file)
|
||||
|
||||
for topics in extract_topics_yake(texts):
|
||||
print(topics)
|
||||
print("\n")
|
||||
|
|
|
|||
1164
poetry.lock
generated
1164
poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -53,30 +53,22 @@ pypdf = "^4.1.0"
|
|||
neo4j = "5.20.0"
|
||||
jinja2 = "^3.1.3"
|
||||
matplotlib = "^3.8.3"
|
||||
nest-asyncio = "^1.6.0"
|
||||
structlog = "^24.1.0"
|
||||
tiktoken = "0.7.0"
|
||||
posthog = "^3.5.0"
|
||||
lancedb = "0.8.0"
|
||||
importlib-metadata = "7.1.0"
|
||||
litellm = "1.38.10"
|
||||
groq = "0.8.0"
|
||||
tantivy = "^0.22.0"
|
||||
huggingface-hub ="0.20.0"
|
||||
tokenizers ="0.15.2"
|
||||
transformers ="4.39.0"
|
||||
python-multipart = "^0.0.9"
|
||||
langfuse = "^2.32.0"
|
||||
spacy = "^3.7.4"
|
||||
protobuf = "<5.0.0"
|
||||
pydantic-settings = "^2.2.1"
|
||||
anthropic = "^0.26.1"
|
||||
langchain-text-splitters = "^0.2.1"
|
||||
yake = {git = "https://github.com/LIAAD/yake"}
|
||||
keybert = "^0.8.5"
|
||||
pdfplumber = "^0.11.1"
|
||||
sentry-sdk = {extras = ["fastapi"], version = "^2.9.0"}
|
||||
langchain-core = '0.2.19'
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue