From fc7851156a8af0f9cdba3bdecebc5f433e5dfdad Mon Sep 17 00:00:00 2001 From: Boris Arzentar Date: Fri, 29 Mar 2024 14:36:58 +0100 Subject: [PATCH] chore: remove unused libs --- cognee/shared/language_processing.py | 93 ---------------------------- poetry.lock | 74 +--------------------- pyproject.toml | 2 - 3 files changed, 1 insertion(+), 168 deletions(-) delete mode 100644 cognee/shared/language_processing.py diff --git a/cognee/shared/language_processing.py b/cognee/shared/language_processing.py deleted file mode 100644 index 203b80050..000000000 --- a/cognee/shared/language_processing.py +++ /dev/null @@ -1,93 +0,0 @@ -""" This module provides language processing functions for language detection and translation. """ -import logging -import boto3 -from botocore.exceptions import BotoCoreError, ClientError -from langdetect import detect, LangDetectException -import iso639 - - -# Basic configuration of the logging system -logging.basicConfig( - level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" -) - - -def detect_language(text): - """ - Detect the language of the given text and return its ISO 639-1 language code. - If the detected language is Croatian ('hr'), it maps to Serbian ('sr'). - The text is trimmed to the first 100 characters for efficient processing. - - Parameters: - text (str): The text for language detection. - - Returns: - str: The ISO 639-1 language code of the detected language, or 'None' in case of an error. - """ - - # Trim the text to the first 100 characters - trimmed_text = text[:100] - - try: - # Detect the language using langdetect - detected_lang_iso639_1 = detect(trimmed_text) - logging.info(f"Detected ISO 639-1 code: %s {detected_lang_iso639_1}") - - # Special case: map 'hr' (Croatian) to 'sr' (Serbian ISO 639-2) - if detected_lang_iso639_1 == "hr": - return "sr" - return detected_lang_iso639_1 - - except LangDetectException as e: - logging.error(f"Language detection error: %s {e}") - except Exception as e: - logging.error(f"Unexpected error: %s {e}") - - return -1 - - -def translate_text( - text, - source_language: str = "sr", - target_language: str = "en", - region_name="eu-west-1", -): - """ - Translate text from source language to target language using AWS Translate. - - - Parameters: - text (str): The text to be translated. - source_language (str): The source language code (e.g., 'sr' for Serbian). - ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php - target_language (str): The target language code (e.g., 'en' for English). - ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php - region_name (str): AWS region name. - - Returns: - str: Translated text or an error message. - """ - if not text: - return "No text provided for translation." - - if not source_language or not target_language: - return "Both source and target language codes are required." - - try: - translate = boto3.client( - service_name="translate", region_name=region_name, use_ssl=True - ) - result = translate.translate_text( - Text=text, - SourceLanguageCode=source_language, - TargetLanguageCode=target_language, - ) - return result.get("TranslatedText", "No translation found.") - - except BotoCoreError as e: - logging.info(f"BotoCoreError occurred: %s {e}") - return "Error with AWS Translate service configuration or request." - - except ClientError as e: - logging.info(f"ClientError occurred: %s {e}") - return "Error with AWS client or network issue." diff --git a/poetry.lock b/poetry.lock index e0eaed773..6265f94c1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2525,20 +2525,6 @@ test-functional = ["jupytext[test]"] test-integration = ["ipykernel", "jupyter-server (!=2.11)", "jupytext[test-functional]", "nbconvert"] test-ui = ["calysto-bash"] -[[package]] -name = "langdetect" -version = "1.0.9" -description = "Language detection library ported from Google's language-detection." -optional = false -python-versions = "*" -files = [ - {file = "langdetect-1.0.9-py2-none-any.whl", hash = "sha256:7cbc0746252f19e76f77c0b1690aadf01963be835ef0cd4b56dddf2a8f1dfc2a"}, - {file = "langdetect-1.0.9.tar.gz", hash = "sha256:cbc1fef89f8d062739774bd51eda3da3274006b3661d199c2655f6b3f6d605a0"}, -] - -[package.dependencies] -six = "*" - [[package]] name = "loguru" version = "0.7.2" @@ -4247,64 +4233,6 @@ pyyaml = "*" [package.extras] extra = ["pygments (>=2.12)"] -[[package]] -name = "pymupdf" -version = "1.24.0" -description = "A high performance Python library for data extraction, analysis, conversion & manipulation of PDF (and other) documents." -optional = false -python-versions = ">=3.8" -files = [ - {file = "PyMuPDF-1.24.0-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:37160eb301e017ec67bb63b1c6f52eae2c90bd1159f6a6b2ec469c3e69d55f74"}, - {file = "PyMuPDF-1.24.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:af2d8ba47851f2a5a2f7592453792a03cbcd705e40512e9aeb199edd7bcce886"}, - {file = "PyMuPDF-1.24.0-cp310-none-manylinux2014_aarch64.whl", hash = "sha256:f318efcfda3ca625b2b2318019d8195b2e239cf1e66eaf5a94cd1e6bd11999d2"}, - {file = "PyMuPDF-1.24.0-cp310-none-manylinux2014_x86_64.whl", hash = "sha256:986b234751e734da1b4f983fd270fa595258781abc25e26d409d96439136c41c"}, - {file = "PyMuPDF-1.24.0-cp310-none-win32.whl", hash = "sha256:490d10c85defec873bf33a54eea1e8cc637927c7efeaff3570b812d7c65256f7"}, - {file = "PyMuPDF-1.24.0-cp310-none-win_amd64.whl", hash = "sha256:2d46cd6535f25ffeb6261d389b932fa6359193a12de3633e200504898d48c27d"}, - {file = "PyMuPDF-1.24.0-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:9354c2654512390d261bad37a90168de0cb954be4e9b3d55073a67e8ca07f7f8"}, - {file = "PyMuPDF-1.24.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:bfc953361277cafa38e5bb93edd2b7c6c0c4284f137cea5847efe730759fe0d2"}, - {file = "PyMuPDF-1.24.0-cp311-none-manylinux2014_aarch64.whl", hash = "sha256:13625c9da4021e649da11acb60e0a8aa300fb6c4bdb450754f975d7f92043999"}, - {file = "PyMuPDF-1.24.0-cp311-none-manylinux2014_x86_64.whl", hash = "sha256:8db27eca7f6aa2c5aa84278cc9961a0183e8aca6d7210a5648658816ea9601bf"}, - {file = "PyMuPDF-1.24.0-cp311-none-win32.whl", hash = "sha256:fc4b7a212b9f3216bb32c1146340efe5282c1519f7250e52ccd9dedcfd04df5d"}, - {file = "PyMuPDF-1.24.0-cp311-none-win_amd64.whl", hash = "sha256:4e92d2895eb55b5475572bda167bb6d3c5b7757ba0b6beee0456ca0d3db852b2"}, - {file = "PyMuPDF-1.24.0-cp312-none-macosx_10_9_x86_64.whl", hash = "sha256:963759f1a2722d25d08e79e00db696e4f5342675bed3b2f2129f03a8d4c41b77"}, - {file = "PyMuPDF-1.24.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:96bcecd0a33b2de6954c4a3c677719cd1d1f36c1fe7dc4e229e06177aef8bdb7"}, - {file = "PyMuPDF-1.24.0-cp312-none-manylinux2014_aarch64.whl", hash = "sha256:b9fb4df0d584b1df3789f521e3950a930884fe0fdd28d4c4ef1c571f3fb9b56e"}, - {file = "PyMuPDF-1.24.0-cp312-none-manylinux2014_x86_64.whl", hash = "sha256:65fc88a23804b83b9390016d377d9350dece167e349140de93769618858ccf8d"}, - {file = "PyMuPDF-1.24.0-cp312-none-win32.whl", hash = "sha256:4395b420477620be4fc90567deb20f17eda5e9757e2ca95f7bc3854d2a6713cc"}, - {file = "PyMuPDF-1.24.0-cp312-none-win_amd64.whl", hash = "sha256:ee1188a8d9bf9dbf21aab8229c99472dd47af315a71753452210f40cff744a7b"}, - {file = "PyMuPDF-1.24.0-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:82ff0a4ed3a27de95726db1f10744c2865212eed2a28e3fd19a081b9c247028d"}, - {file = "PyMuPDF-1.24.0-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:9e9945d1af3ec6deff4c5d61edc63b9c68d49c2212df1104614e2ab173b1d158"}, - {file = "PyMuPDF-1.24.0-cp38-none-manylinux2014_aarch64.whl", hash = "sha256:f120a23a0690be2e6d3ec195c308582930c75fbf3fb6cb6785252a01454fb0ef"}, - {file = "PyMuPDF-1.24.0-cp38-none-manylinux2014_x86_64.whl", hash = "sha256:08bb534a046d7492ab7cf726ef9aa01a14791e53922ffc2a341fa617709434f2"}, - {file = "PyMuPDF-1.24.0-cp38-none-win32.whl", hash = "sha256:f428210b2fc7e0094dbcd62acc15554cb3ee9778a3429bf2d04850cfbab227fb"}, - {file = "PyMuPDF-1.24.0-cp38-none-win_amd64.whl", hash = "sha256:6731cc7ef76d972220bd1bb50d5b67720de2038312be23806045bcc5f9675951"}, - {file = "PyMuPDF-1.24.0-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:de1aa7825f3333dfbff26e88f9cd37491a625b783b8b4780a14e5f70ab6d9853"}, - {file = "PyMuPDF-1.24.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:160a3310f33fda1c0cfaed82d4e22a2aca960ebf5c6919982032727973e42830"}, - {file = "PyMuPDF-1.24.0-cp39-none-manylinux2014_aarch64.whl", hash = "sha256:ce6f1f0b3ca8023bdbbc90fd2428b05db5c7c4b581d785072200082924f6c82f"}, - {file = "PyMuPDF-1.24.0-cp39-none-manylinux2014_x86_64.whl", hash = "sha256:750908f95771fa0fcdbc690f6aae7e0031ff002c5ea343f12930e42da73e5c8b"}, - {file = "PyMuPDF-1.24.0-cp39-none-win32.whl", hash = "sha256:d193319e3850f4025dc1e3c8a6a0b03683668353aacf660d434668be51e3e464"}, - {file = "PyMuPDF-1.24.0-cp39-none-win_amd64.whl", hash = "sha256:e72b7ab4b2dfffe38ceed1e577ffaaa2e34117d87fc716b0238a6f2a12670fe4"}, - {file = "PyMuPDF-1.24.0.tar.gz", hash = "sha256:b6811b09af1ddb93229066f7acf183f6aeeeec4bf9c2290ff81fbeebbc5a4f79"}, -] - -[package.dependencies] -PyMuPDFb = "1.24.0" - -[[package]] -name = "pymupdfb" -version = "1.24.0" -description = "MuPDF shared libraries for PyMuPDF." -optional = false -python-versions = ">=3.8" -files = [ - {file = "PyMuPDFb-1.24.0-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:5af4e14171efd5e85b82ce2ae94caaebae9f4314103fc9af62be99537e21562e"}, - {file = "PyMuPDFb-1.24.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:113e424b534a73a00dfaf2407beab3e9c35bfe406f77cfa66a43cf5f87bafef6"}, - {file = "PyMuPDFb-1.24.0-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:347fff11c61e82538bdf6293cb4cfb41aa7b6ae14a4785efaaa81da949126424"}, - {file = "PyMuPDFb-1.24.0-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:871e100637fd64c76356656ca4122f4d355906aa25173997959ccaf39413c8d4"}, - {file = "PyMuPDFb-1.24.0-py3-none-win32.whl", hash = "sha256:051e043ada55ecf03cae28b9990ec53b975a69995a0f177caedc9b3bf85d2d22"}, - {file = "PyMuPDFb-1.24.0-py3-none-win_amd64.whl", hash = "sha256:3e368ce2a8935881965343a7b87565b532a1787a3dc8f5580980dfb8b91d0c39"}, -] - [[package]] name = "pypdf" version = "4.1.0" @@ -6194,4 +6122,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = "~3.11" -content-hash = "df096228fdc234bf2b2d53231366997744a34c03ea294e86071f8fa4679b262e" +content-hash = "d556cc86b6310e0c6020d900d4f958e067fd235c40a1df9e3139ca80625c923c" diff --git a/pyproject.toml b/pyproject.toml index 01af85bdc..74c296f59 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,12 +29,10 @@ asyncpg = "^0.28.0" instructor = "^0.6.8" networkx = "^3.2.1" graphviz = "^0.20.1" -langdetect = "^1.0.9" debugpy = "^1.8.0" pyarrow = "^15.0.0" pylint = "^3.0.3" aiosqlite = "^0.20.0" -pymupdf = "^1.23.25" pandas = "^2.2.0" greenlet = "^3.0.3" ruff = "^0.2.2"