chore: remove unused dependencies
This commit is contained in:
parent
14555a25d0
commit
6ef4bbe862
4 changed files with 12 additions and 1190 deletions
|
|
@ -1 +1 @@
|
||||||
from .extract_topics import extract_topics_yake, extract_topics_keybert
|
from .extract_topics import extract_topics_keybert
|
||||||
|
|
|
||||||
|
|
@ -5,21 +5,6 @@ from nltk.corpus import stopwords, wordnet
|
||||||
from nltk.tokenize import word_tokenize
|
from nltk.tokenize import word_tokenize
|
||||||
from nltk.stem import WordNetLemmatizer
|
from nltk.stem import WordNetLemmatizer
|
||||||
|
|
||||||
def extract_topics_yake(texts: list[str]):
|
|
||||||
from yake import KeywordExtractor
|
|
||||||
|
|
||||||
keyword_extractor = KeywordExtractor(
|
|
||||||
top = 3,
|
|
||||||
n = 2,
|
|
||||||
dedupLim = 0.2,
|
|
||||||
dedupFunc = "levenshtein", # "seqm" | "levenshtein"
|
|
||||||
windowsSize = 1,
|
|
||||||
)
|
|
||||||
|
|
||||||
for text in texts:
|
|
||||||
topics = keyword_extractor.extract_keywords(preprocess_text(text))
|
|
||||||
yield [topic[0] for topic in topics]
|
|
||||||
|
|
||||||
def extract_topics_keybert(texts: list[str]):
|
def extract_topics_keybert(texts: list[str]):
|
||||||
from keybert import KeyBERT
|
from keybert import KeyBERT
|
||||||
|
|
||||||
|
|
@ -98,16 +83,3 @@ def preprocess_text(text: str):
|
||||||
# text = [word for word in text if not word in stop_words]
|
# text = [word for word in text if not word in stop_words]
|
||||||
# return " ".join(text)
|
# return " ".join(text)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
import os
|
|
||||||
|
|
||||||
file_dir = os.path.dirname(os.path.realpath(__file__))
|
|
||||||
|
|
||||||
with open(os.path.join(file_dir, "texts.json"), "r", encoding = "utf-8") as file:
|
|
||||||
import json
|
|
||||||
texts = json.load(file)
|
|
||||||
|
|
||||||
for topics in extract_topics_yake(texts):
|
|
||||||
print(topics)
|
|
||||||
print("\n")
|
|
||||||
|
|
|
||||||
1164
poetry.lock
generated
1164
poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -53,30 +53,22 @@ pypdf = "^4.1.0"
|
||||||
neo4j = "5.20.0"
|
neo4j = "5.20.0"
|
||||||
jinja2 = "^3.1.3"
|
jinja2 = "^3.1.3"
|
||||||
matplotlib = "^3.8.3"
|
matplotlib = "^3.8.3"
|
||||||
nest-asyncio = "^1.6.0"
|
|
||||||
structlog = "^24.1.0"
|
structlog = "^24.1.0"
|
||||||
tiktoken = "0.7.0"
|
tiktoken = "0.7.0"
|
||||||
posthog = "^3.5.0"
|
posthog = "^3.5.0"
|
||||||
lancedb = "0.8.0"
|
lancedb = "0.8.0"
|
||||||
importlib-metadata = "7.1.0"
|
|
||||||
litellm = "1.38.10"
|
litellm = "1.38.10"
|
||||||
groq = "0.8.0"
|
groq = "0.8.0"
|
||||||
tantivy = "^0.22.0"
|
tantivy = "^0.22.0"
|
||||||
huggingface-hub ="0.20.0"
|
|
||||||
tokenizers ="0.15.2"
|
tokenizers ="0.15.2"
|
||||||
transformers ="4.39.0"
|
transformers ="4.39.0"
|
||||||
python-multipart = "^0.0.9"
|
python-multipart = "^0.0.9"
|
||||||
langfuse = "^2.32.0"
|
langfuse = "^2.32.0"
|
||||||
spacy = "^3.7.4"
|
|
||||||
protobuf = "<5.0.0"
|
protobuf = "<5.0.0"
|
||||||
pydantic-settings = "^2.2.1"
|
pydantic-settings = "^2.2.1"
|
||||||
anthropic = "^0.26.1"
|
anthropic = "^0.26.1"
|
||||||
langchain-text-splitters = "^0.2.1"
|
|
||||||
yake = {git = "https://github.com/LIAAD/yake"}
|
|
||||||
keybert = "^0.8.5"
|
|
||||||
pdfplumber = "^0.11.1"
|
pdfplumber = "^0.11.1"
|
||||||
sentry-sdk = {extras = ["fastapi"], version = "^2.9.0"}
|
sentry-sdk = {extras = ["fastapi"], version = "^2.9.0"}
|
||||||
langchain-core = '0.2.19'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue