chore: remove unused libs
This commit is contained in:
parent
cf39bb1c85
commit
fc7851156a
3 changed files with 1 additions and 168 deletions
|
|
@ -1,93 +0,0 @@
|
||||||
""" This module provides language processing functions for language detection and translation. """
|
|
||||||
import logging
|
|
||||||
import boto3
|
|
||||||
from botocore.exceptions import BotoCoreError, ClientError
|
|
||||||
from langdetect import detect, LangDetectException
|
|
||||||
import iso639
|
|
||||||
|
|
||||||
|
|
||||||
# Basic configuration of the logging system
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def detect_language(text):
|
|
||||||
"""
|
|
||||||
Detect the language of the given text and return its ISO 639-1 language code.
|
|
||||||
If the detected language is Croatian ('hr'), it maps to Serbian ('sr').
|
|
||||||
The text is trimmed to the first 100 characters for efficient processing.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
text (str): The text for language detection.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: The ISO 639-1 language code of the detected language, or 'None' in case of an error.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Trim the text to the first 100 characters
|
|
||||||
trimmed_text = text[:100]
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Detect the language using langdetect
|
|
||||||
detected_lang_iso639_1 = detect(trimmed_text)
|
|
||||||
logging.info(f"Detected ISO 639-1 code: %s {detected_lang_iso639_1}")
|
|
||||||
|
|
||||||
# Special case: map 'hr' (Croatian) to 'sr' (Serbian ISO 639-2)
|
|
||||||
if detected_lang_iso639_1 == "hr":
|
|
||||||
return "sr"
|
|
||||||
return detected_lang_iso639_1
|
|
||||||
|
|
||||||
except LangDetectException as e:
|
|
||||||
logging.error(f"Language detection error: %s {e}")
|
|
||||||
except Exception as e:
|
|
||||||
logging.error(f"Unexpected error: %s {e}")
|
|
||||||
|
|
||||||
return -1
|
|
||||||
|
|
||||||
|
|
||||||
def translate_text(
|
|
||||||
text,
|
|
||||||
source_language: str = "sr",
|
|
||||||
target_language: str = "en",
|
|
||||||
region_name="eu-west-1",
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
Translate text from source language to target language using AWS Translate.
|
|
||||||
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
text (str): The text to be translated.
|
|
||||||
source_language (str): The source language code (e.g., 'sr' for Serbian).
|
|
||||||
ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php
|
|
||||||
target_language (str): The target language code (e.g., 'en' for English).
|
|
||||||
ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php
|
|
||||||
region_name (str): AWS region name.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: Translated text or an error message.
|
|
||||||
"""
|
|
||||||
if not text:
|
|
||||||
return "No text provided for translation."
|
|
||||||
|
|
||||||
if not source_language or not target_language:
|
|
||||||
return "Both source and target language codes are required."
|
|
||||||
|
|
||||||
try:
|
|
||||||
translate = boto3.client(
|
|
||||||
service_name="translate", region_name=region_name, use_ssl=True
|
|
||||||
)
|
|
||||||
result = translate.translate_text(
|
|
||||||
Text=text,
|
|
||||||
SourceLanguageCode=source_language,
|
|
||||||
TargetLanguageCode=target_language,
|
|
||||||
)
|
|
||||||
return result.get("TranslatedText", "No translation found.")
|
|
||||||
|
|
||||||
except BotoCoreError as e:
|
|
||||||
logging.info(f"BotoCoreError occurred: %s {e}")
|
|
||||||
return "Error with AWS Translate service configuration or request."
|
|
||||||
|
|
||||||
except ClientError as e:
|
|
||||||
logging.info(f"ClientError occurred: %s {e}")
|
|
||||||
return "Error with AWS client or network issue."
|
|
||||||
74
poetry.lock
generated
74
poetry.lock
generated
|
|
@ -2525,20 +2525,6 @@ test-functional = ["jupytext[test]"]
|
||||||
test-integration = ["ipykernel", "jupyter-server (!=2.11)", "jupytext[test-functional]", "nbconvert"]
|
test-integration = ["ipykernel", "jupyter-server (!=2.11)", "jupytext[test-functional]", "nbconvert"]
|
||||||
test-ui = ["calysto-bash"]
|
test-ui = ["calysto-bash"]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "langdetect"
|
|
||||||
version = "1.0.9"
|
|
||||||
description = "Language detection library ported from Google's language-detection."
|
|
||||||
optional = false
|
|
||||||
python-versions = "*"
|
|
||||||
files = [
|
|
||||||
{file = "langdetect-1.0.9-py2-none-any.whl", hash = "sha256:7cbc0746252f19e76f77c0b1690aadf01963be835ef0cd4b56dddf2a8f1dfc2a"},
|
|
||||||
{file = "langdetect-1.0.9.tar.gz", hash = "sha256:cbc1fef89f8d062739774bd51eda3da3274006b3661d199c2655f6b3f6d605a0"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[package.dependencies]
|
|
||||||
six = "*"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "loguru"
|
name = "loguru"
|
||||||
version = "0.7.2"
|
version = "0.7.2"
|
||||||
|
|
@ -4247,64 +4233,6 @@ pyyaml = "*"
|
||||||
[package.extras]
|
[package.extras]
|
||||||
extra = ["pygments (>=2.12)"]
|
extra = ["pygments (>=2.12)"]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pymupdf"
|
|
||||||
version = "1.24.0"
|
|
||||||
description = "A high performance Python library for data extraction, analysis, conversion & manipulation of PDF (and other) documents."
|
|
||||||
optional = false
|
|
||||||
python-versions = ">=3.8"
|
|
||||||
files = [
|
|
||||||
{file = "PyMuPDF-1.24.0-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:37160eb301e017ec67bb63b1c6f52eae2c90bd1159f6a6b2ec469c3e69d55f74"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:af2d8ba47851f2a5a2f7592453792a03cbcd705e40512e9aeb199edd7bcce886"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp310-none-manylinux2014_aarch64.whl", hash = "sha256:f318efcfda3ca625b2b2318019d8195b2e239cf1e66eaf5a94cd1e6bd11999d2"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp310-none-manylinux2014_x86_64.whl", hash = "sha256:986b234751e734da1b4f983fd270fa595258781abc25e26d409d96439136c41c"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp310-none-win32.whl", hash = "sha256:490d10c85defec873bf33a54eea1e8cc637927c7efeaff3570b812d7c65256f7"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp310-none-win_amd64.whl", hash = "sha256:2d46cd6535f25ffeb6261d389b932fa6359193a12de3633e200504898d48c27d"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:9354c2654512390d261bad37a90168de0cb954be4e9b3d55073a67e8ca07f7f8"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:bfc953361277cafa38e5bb93edd2b7c6c0c4284f137cea5847efe730759fe0d2"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp311-none-manylinux2014_aarch64.whl", hash = "sha256:13625c9da4021e649da11acb60e0a8aa300fb6c4bdb450754f975d7f92043999"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp311-none-manylinux2014_x86_64.whl", hash = "sha256:8db27eca7f6aa2c5aa84278cc9961a0183e8aca6d7210a5648658816ea9601bf"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp311-none-win32.whl", hash = "sha256:fc4b7a212b9f3216bb32c1146340efe5282c1519f7250e52ccd9dedcfd04df5d"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp311-none-win_amd64.whl", hash = "sha256:4e92d2895eb55b5475572bda167bb6d3c5b7757ba0b6beee0456ca0d3db852b2"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp312-none-macosx_10_9_x86_64.whl", hash = "sha256:963759f1a2722d25d08e79e00db696e4f5342675bed3b2f2129f03a8d4c41b77"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:96bcecd0a33b2de6954c4a3c677719cd1d1f36c1fe7dc4e229e06177aef8bdb7"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp312-none-manylinux2014_aarch64.whl", hash = "sha256:b9fb4df0d584b1df3789f521e3950a930884fe0fdd28d4c4ef1c571f3fb9b56e"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp312-none-manylinux2014_x86_64.whl", hash = "sha256:65fc88a23804b83b9390016d377d9350dece167e349140de93769618858ccf8d"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp312-none-win32.whl", hash = "sha256:4395b420477620be4fc90567deb20f17eda5e9757e2ca95f7bc3854d2a6713cc"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp312-none-win_amd64.whl", hash = "sha256:ee1188a8d9bf9dbf21aab8229c99472dd47af315a71753452210f40cff744a7b"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:82ff0a4ed3a27de95726db1f10744c2865212eed2a28e3fd19a081b9c247028d"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:9e9945d1af3ec6deff4c5d61edc63b9c68d49c2212df1104614e2ab173b1d158"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp38-none-manylinux2014_aarch64.whl", hash = "sha256:f120a23a0690be2e6d3ec195c308582930c75fbf3fb6cb6785252a01454fb0ef"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp38-none-manylinux2014_x86_64.whl", hash = "sha256:08bb534a046d7492ab7cf726ef9aa01a14791e53922ffc2a341fa617709434f2"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp38-none-win32.whl", hash = "sha256:f428210b2fc7e0094dbcd62acc15554cb3ee9778a3429bf2d04850cfbab227fb"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp38-none-win_amd64.whl", hash = "sha256:6731cc7ef76d972220bd1bb50d5b67720de2038312be23806045bcc5f9675951"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:de1aa7825f3333dfbff26e88f9cd37491a625b783b8b4780a14e5f70ab6d9853"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:160a3310f33fda1c0cfaed82d4e22a2aca960ebf5c6919982032727973e42830"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp39-none-manylinux2014_aarch64.whl", hash = "sha256:ce6f1f0b3ca8023bdbbc90fd2428b05db5c7c4b581d785072200082924f6c82f"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp39-none-manylinux2014_x86_64.whl", hash = "sha256:750908f95771fa0fcdbc690f6aae7e0031ff002c5ea343f12930e42da73e5c8b"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp39-none-win32.whl", hash = "sha256:d193319e3850f4025dc1e3c8a6a0b03683668353aacf660d434668be51e3e464"},
|
|
||||||
{file = "PyMuPDF-1.24.0-cp39-none-win_amd64.whl", hash = "sha256:e72b7ab4b2dfffe38ceed1e577ffaaa2e34117d87fc716b0238a6f2a12670fe4"},
|
|
||||||
{file = "PyMuPDF-1.24.0.tar.gz", hash = "sha256:b6811b09af1ddb93229066f7acf183f6aeeeec4bf9c2290ff81fbeebbc5a4f79"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[package.dependencies]
|
|
||||||
PyMuPDFb = "1.24.0"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pymupdfb"
|
|
||||||
version = "1.24.0"
|
|
||||||
description = "MuPDF shared libraries for PyMuPDF."
|
|
||||||
optional = false
|
|
||||||
python-versions = ">=3.8"
|
|
||||||
files = [
|
|
||||||
{file = "PyMuPDFb-1.24.0-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:5af4e14171efd5e85b82ce2ae94caaebae9f4314103fc9af62be99537e21562e"},
|
|
||||||
{file = "PyMuPDFb-1.24.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:113e424b534a73a00dfaf2407beab3e9c35bfe406f77cfa66a43cf5f87bafef6"},
|
|
||||||
{file = "PyMuPDFb-1.24.0-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:347fff11c61e82538bdf6293cb4cfb41aa7b6ae14a4785efaaa81da949126424"},
|
|
||||||
{file = "PyMuPDFb-1.24.0-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:871e100637fd64c76356656ca4122f4d355906aa25173997959ccaf39413c8d4"},
|
|
||||||
{file = "PyMuPDFb-1.24.0-py3-none-win32.whl", hash = "sha256:051e043ada55ecf03cae28b9990ec53b975a69995a0f177caedc9b3bf85d2d22"},
|
|
||||||
{file = "PyMuPDFb-1.24.0-py3-none-win_amd64.whl", hash = "sha256:3e368ce2a8935881965343a7b87565b532a1787a3dc8f5580980dfb8b91d0c39"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pypdf"
|
name = "pypdf"
|
||||||
version = "4.1.0"
|
version = "4.1.0"
|
||||||
|
|
@ -6194,4 +6122,4 @@ weaviate = ["weaviate-client"]
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "~3.11"
|
python-versions = "~3.11"
|
||||||
content-hash = "df096228fdc234bf2b2d53231366997744a34c03ea294e86071f8fa4679b262e"
|
content-hash = "d556cc86b6310e0c6020d900d4f958e067fd235c40a1df9e3139ca80625c923c"
|
||||||
|
|
|
||||||
|
|
@ -29,12 +29,10 @@ asyncpg = "^0.28.0"
|
||||||
instructor = "^0.6.8"
|
instructor = "^0.6.8"
|
||||||
networkx = "^3.2.1"
|
networkx = "^3.2.1"
|
||||||
graphviz = "^0.20.1"
|
graphviz = "^0.20.1"
|
||||||
langdetect = "^1.0.9"
|
|
||||||
debugpy = "^1.8.0"
|
debugpy = "^1.8.0"
|
||||||
pyarrow = "^15.0.0"
|
pyarrow = "^15.0.0"
|
||||||
pylint = "^3.0.3"
|
pylint = "^3.0.3"
|
||||||
aiosqlite = "^0.20.0"
|
aiosqlite = "^0.20.0"
|
||||||
pymupdf = "^1.23.25"
|
|
||||||
pandas = "^2.2.0"
|
pandas = "^2.2.0"
|
||||||
greenlet = "^3.0.3"
|
greenlet = "^3.0.3"
|
||||||
ruff = "^0.2.2"
|
ruff = "^0.2.2"
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue