chore: remove unused libs
This commit is contained in:
parent
cf39bb1c85
commit
fc7851156a
3 changed files with 1 additions and 168 deletions
|
|
@ -1,93 +0,0 @@
|
|||
""" This module provides language processing functions for language detection and translation. """
|
||||
import logging
|
||||
import boto3
|
||||
from botocore.exceptions import BotoCoreError, ClientError
|
||||
from langdetect import detect, LangDetectException
|
||||
import iso639
|
||||
|
||||
|
||||
# Basic configuration of the logging system
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
|
||||
|
||||
def detect_language(text):
|
||||
"""
|
||||
Detect the language of the given text and return its ISO 639-1 language code.
|
||||
If the detected language is Croatian ('hr'), it maps to Serbian ('sr').
|
||||
The text is trimmed to the first 100 characters for efficient processing.
|
||||
|
||||
Parameters:
|
||||
text (str): The text for language detection.
|
||||
|
||||
Returns:
|
||||
str: The ISO 639-1 language code of the detected language, or 'None' in case of an error.
|
||||
"""
|
||||
|
||||
# Trim the text to the first 100 characters
|
||||
trimmed_text = text[:100]
|
||||
|
||||
try:
|
||||
# Detect the language using langdetect
|
||||
detected_lang_iso639_1 = detect(trimmed_text)
|
||||
logging.info(f"Detected ISO 639-1 code: %s {detected_lang_iso639_1}")
|
||||
|
||||
# Special case: map 'hr' (Croatian) to 'sr' (Serbian ISO 639-2)
|
||||
if detected_lang_iso639_1 == "hr":
|
||||
return "sr"
|
||||
return detected_lang_iso639_1
|
||||
|
||||
except LangDetectException as e:
|
||||
logging.error(f"Language detection error: %s {e}")
|
||||
except Exception as e:
|
||||
logging.error(f"Unexpected error: %s {e}")
|
||||
|
||||
return -1
|
||||
|
||||
|
||||
def translate_text(
|
||||
text,
|
||||
source_language: str = "sr",
|
||||
target_language: str = "en",
|
||||
region_name="eu-west-1",
|
||||
):
|
||||
"""
|
||||
Translate text from source language to target language using AWS Translate.
|
||||
|
||||
|
||||
Parameters:
|
||||
text (str): The text to be translated.
|
||||
source_language (str): The source language code (e.g., 'sr' for Serbian).
|
||||
ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php
|
||||
target_language (str): The target language code (e.g., 'en' for English).
|
||||
ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php
|
||||
region_name (str): AWS region name.
|
||||
|
||||
Returns:
|
||||
str: Translated text or an error message.
|
||||
"""
|
||||
if not text:
|
||||
return "No text provided for translation."
|
||||
|
||||
if not source_language or not target_language:
|
||||
return "Both source and target language codes are required."
|
||||
|
||||
try:
|
||||
translate = boto3.client(
|
||||
service_name="translate", region_name=region_name, use_ssl=True
|
||||
)
|
||||
result = translate.translate_text(
|
||||
Text=text,
|
||||
SourceLanguageCode=source_language,
|
||||
TargetLanguageCode=target_language,
|
||||
)
|
||||
return result.get("TranslatedText", "No translation found.")
|
||||
|
||||
except BotoCoreError as e:
|
||||
logging.info(f"BotoCoreError occurred: %s {e}")
|
||||
return "Error with AWS Translate service configuration or request."
|
||||
|
||||
except ClientError as e:
|
||||
logging.info(f"ClientError occurred: %s {e}")
|
||||
return "Error with AWS client or network issue."
|
||||
74
poetry.lock
generated
74
poetry.lock
generated
|
|
@ -2525,20 +2525,6 @@ test-functional = ["jupytext[test]"]
|
|||
test-integration = ["ipykernel", "jupyter-server (!=2.11)", "jupytext[test-functional]", "nbconvert"]
|
||||
test-ui = ["calysto-bash"]
|
||||
|
||||
[[package]]
|
||||
name = "langdetect"
|
||||
version = "1.0.9"
|
||||
description = "Language detection library ported from Google's language-detection."
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "langdetect-1.0.9-py2-none-any.whl", hash = "sha256:7cbc0746252f19e76f77c0b1690aadf01963be835ef0cd4b56dddf2a8f1dfc2a"},
|
||||
{file = "langdetect-1.0.9.tar.gz", hash = "sha256:cbc1fef89f8d062739774bd51eda3da3274006b3661d199c2655f6b3f6d605a0"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
six = "*"
|
||||
|
||||
[[package]]
|
||||
name = "loguru"
|
||||
version = "0.7.2"
|
||||
|
|
@ -4247,64 +4233,6 @@ pyyaml = "*"
|
|||
[package.extras]
|
||||
extra = ["pygments (>=2.12)"]
|
||||
|
||||
[[package]]
|
||||
name = "pymupdf"
|
||||
version = "1.24.0"
|
||||
description = "A high performance Python library for data extraction, analysis, conversion & manipulation of PDF (and other) documents."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "PyMuPDF-1.24.0-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:37160eb301e017ec67bb63b1c6f52eae2c90bd1159f6a6b2ec469c3e69d55f74"},
|
||||
{file = "PyMuPDF-1.24.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:af2d8ba47851f2a5a2f7592453792a03cbcd705e40512e9aeb199edd7bcce886"},
|
||||
{file = "PyMuPDF-1.24.0-cp310-none-manylinux2014_aarch64.whl", hash = "sha256:f318efcfda3ca625b2b2318019d8195b2e239cf1e66eaf5a94cd1e6bd11999d2"},
|
||||
{file = "PyMuPDF-1.24.0-cp310-none-manylinux2014_x86_64.whl", hash = "sha256:986b234751e734da1b4f983fd270fa595258781abc25e26d409d96439136c41c"},
|
||||
{file = "PyMuPDF-1.24.0-cp310-none-win32.whl", hash = "sha256:490d10c85defec873bf33a54eea1e8cc637927c7efeaff3570b812d7c65256f7"},
|
||||
{file = "PyMuPDF-1.24.0-cp310-none-win_amd64.whl", hash = "sha256:2d46cd6535f25ffeb6261d389b932fa6359193a12de3633e200504898d48c27d"},
|
||||
{file = "PyMuPDF-1.24.0-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:9354c2654512390d261bad37a90168de0cb954be4e9b3d55073a67e8ca07f7f8"},
|
||||
{file = "PyMuPDF-1.24.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:bfc953361277cafa38e5bb93edd2b7c6c0c4284f137cea5847efe730759fe0d2"},
|
||||
{file = "PyMuPDF-1.24.0-cp311-none-manylinux2014_aarch64.whl", hash = "sha256:13625c9da4021e649da11acb60e0a8aa300fb6c4bdb450754f975d7f92043999"},
|
||||
{file = "PyMuPDF-1.24.0-cp311-none-manylinux2014_x86_64.whl", hash = "sha256:8db27eca7f6aa2c5aa84278cc9961a0183e8aca6d7210a5648658816ea9601bf"},
|
||||
{file = "PyMuPDF-1.24.0-cp311-none-win32.whl", hash = "sha256:fc4b7a212b9f3216bb32c1146340efe5282c1519f7250e52ccd9dedcfd04df5d"},
|
||||
{file = "PyMuPDF-1.24.0-cp311-none-win_amd64.whl", hash = "sha256:4e92d2895eb55b5475572bda167bb6d3c5b7757ba0b6beee0456ca0d3db852b2"},
|
||||
{file = "PyMuPDF-1.24.0-cp312-none-macosx_10_9_x86_64.whl", hash = "sha256:963759f1a2722d25d08e79e00db696e4f5342675bed3b2f2129f03a8d4c41b77"},
|
||||
{file = "PyMuPDF-1.24.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:96bcecd0a33b2de6954c4a3c677719cd1d1f36c1fe7dc4e229e06177aef8bdb7"},
|
||||
{file = "PyMuPDF-1.24.0-cp312-none-manylinux2014_aarch64.whl", hash = "sha256:b9fb4df0d584b1df3789f521e3950a930884fe0fdd28d4c4ef1c571f3fb9b56e"},
|
||||
{file = "PyMuPDF-1.24.0-cp312-none-manylinux2014_x86_64.whl", hash = "sha256:65fc88a23804b83b9390016d377d9350dece167e349140de93769618858ccf8d"},
|
||||
{file = "PyMuPDF-1.24.0-cp312-none-win32.whl", hash = "sha256:4395b420477620be4fc90567deb20f17eda5e9757e2ca95f7bc3854d2a6713cc"},
|
||||
{file = "PyMuPDF-1.24.0-cp312-none-win_amd64.whl", hash = "sha256:ee1188a8d9bf9dbf21aab8229c99472dd47af315a71753452210f40cff744a7b"},
|
||||
{file = "PyMuPDF-1.24.0-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:82ff0a4ed3a27de95726db1f10744c2865212eed2a28e3fd19a081b9c247028d"},
|
||||
{file = "PyMuPDF-1.24.0-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:9e9945d1af3ec6deff4c5d61edc63b9c68d49c2212df1104614e2ab173b1d158"},
|
||||
{file = "PyMuPDF-1.24.0-cp38-none-manylinux2014_aarch64.whl", hash = "sha256:f120a23a0690be2e6d3ec195c308582930c75fbf3fb6cb6785252a01454fb0ef"},
|
||||
{file = "PyMuPDF-1.24.0-cp38-none-manylinux2014_x86_64.whl", hash = "sha256:08bb534a046d7492ab7cf726ef9aa01a14791e53922ffc2a341fa617709434f2"},
|
||||
{file = "PyMuPDF-1.24.0-cp38-none-win32.whl", hash = "sha256:f428210b2fc7e0094dbcd62acc15554cb3ee9778a3429bf2d04850cfbab227fb"},
|
||||
{file = "PyMuPDF-1.24.0-cp38-none-win_amd64.whl", hash = "sha256:6731cc7ef76d972220bd1bb50d5b67720de2038312be23806045bcc5f9675951"},
|
||||
{file = "PyMuPDF-1.24.0-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:de1aa7825f3333dfbff26e88f9cd37491a625b783b8b4780a14e5f70ab6d9853"},
|
||||
{file = "PyMuPDF-1.24.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:160a3310f33fda1c0cfaed82d4e22a2aca960ebf5c6919982032727973e42830"},
|
||||
{file = "PyMuPDF-1.24.0-cp39-none-manylinux2014_aarch64.whl", hash = "sha256:ce6f1f0b3ca8023bdbbc90fd2428b05db5c7c4b581d785072200082924f6c82f"},
|
||||
{file = "PyMuPDF-1.24.0-cp39-none-manylinux2014_x86_64.whl", hash = "sha256:750908f95771fa0fcdbc690f6aae7e0031ff002c5ea343f12930e42da73e5c8b"},
|
||||
{file = "PyMuPDF-1.24.0-cp39-none-win32.whl", hash = "sha256:d193319e3850f4025dc1e3c8a6a0b03683668353aacf660d434668be51e3e464"},
|
||||
{file = "PyMuPDF-1.24.0-cp39-none-win_amd64.whl", hash = "sha256:e72b7ab4b2dfffe38ceed1e577ffaaa2e34117d87fc716b0238a6f2a12670fe4"},
|
||||
{file = "PyMuPDF-1.24.0.tar.gz", hash = "sha256:b6811b09af1ddb93229066f7acf183f6aeeeec4bf9c2290ff81fbeebbc5a4f79"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
PyMuPDFb = "1.24.0"
|
||||
|
||||
[[package]]
|
||||
name = "pymupdfb"
|
||||
version = "1.24.0"
|
||||
description = "MuPDF shared libraries for PyMuPDF."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "PyMuPDFb-1.24.0-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:5af4e14171efd5e85b82ce2ae94caaebae9f4314103fc9af62be99537e21562e"},
|
||||
{file = "PyMuPDFb-1.24.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:113e424b534a73a00dfaf2407beab3e9c35bfe406f77cfa66a43cf5f87bafef6"},
|
||||
{file = "PyMuPDFb-1.24.0-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:347fff11c61e82538bdf6293cb4cfb41aa7b6ae14a4785efaaa81da949126424"},
|
||||
{file = "PyMuPDFb-1.24.0-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:871e100637fd64c76356656ca4122f4d355906aa25173997959ccaf39413c8d4"},
|
||||
{file = "PyMuPDFb-1.24.0-py3-none-win32.whl", hash = "sha256:051e043ada55ecf03cae28b9990ec53b975a69995a0f177caedc9b3bf85d2d22"},
|
||||
{file = "PyMuPDFb-1.24.0-py3-none-win_amd64.whl", hash = "sha256:3e368ce2a8935881965343a7b87565b532a1787a3dc8f5580980dfb8b91d0c39"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pypdf"
|
||||
version = "4.1.0"
|
||||
|
|
@ -6194,4 +6122,4 @@ weaviate = ["weaviate-client"]
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "~3.11"
|
||||
content-hash = "df096228fdc234bf2b2d53231366997744a34c03ea294e86071f8fa4679b262e"
|
||||
content-hash = "d556cc86b6310e0c6020d900d4f958e067fd235c40a1df9e3139ca80625c923c"
|
||||
|
|
|
|||
|
|
@ -29,12 +29,10 @@ asyncpg = "^0.28.0"
|
|||
instructor = "^0.6.8"
|
||||
networkx = "^3.2.1"
|
||||
graphviz = "^0.20.1"
|
||||
langdetect = "^1.0.9"
|
||||
debugpy = "^1.8.0"
|
||||
pyarrow = "^15.0.0"
|
||||
pylint = "^3.0.3"
|
||||
aiosqlite = "^0.20.0"
|
||||
pymupdf = "^1.23.25"
|
||||
pandas = "^2.2.0"
|
||||
greenlet = "^3.0.3"
|
||||
ruff = "^0.2.2"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue