|
|
|
"""base translator class"""
|
|
|
|
|
|
|
|
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
|
|
|
|
|
|
|
|
from abc import ABC, abstractmethod
|
|
|
|
from pathlib import Path
|
|
|
|
from typing import List, Optional, Union
|
|
|
|
|
|
|
|
from deep_translator.constants import GOOGLE_LANGUAGES_TO_CODES
|
|
|
|
from deep_translator.exceptions import (
|
|
|
|
InvalidSourceOrTargetLanguage,
|
|
|
|
LanguageNotSupportedException,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
class BaseTranslator(ABC):
|
|
|
|
"""
|
|
|
|
Abstract class that serve as a base translator for other different translators
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
base_url: str = None,
|
|
|
|
languages: dict = GOOGLE_LANGUAGES_TO_CODES,
|
|
|
|
source: str = "auto",
|
|
|
|
target: str = "en",
|
|
|
|
payload_key: Optional[str] = None,
|
|
|
|
element_tag: Optional[str] = None,
|
|
|
|
element_query: Optional[dict] = None,
|
|
|
|
**url_params,
|
|
|
|
):
|
|
|
|
"""
|
|
|
|
@param source: source language to translate from
|
|
|
|
@param target: target language to translate to
|
|
|
|
"""
|
|
|
|
self._base_url = base_url
|
|
|
|
self._languages = languages
|
|
|
|
self._supported_languages = list(self._languages.keys())
|
|
|
|
if not source:
|
|
|
|
raise InvalidSourceOrTargetLanguage(source)
|
|
|
|
if not target:
|
|
|
|
raise InvalidSourceOrTargetLanguage(target)
|
|
|
|
|
|
|
|
self._source, self._target = self._map_language_to_code(source, target)
|
|
|
|
self._url_params = url_params
|
|
|
|
self._element_tag = element_tag
|
|
|
|
self._element_query = element_query
|
|
|
|
self.payload_key = payload_key
|
|
|
|
super().__init__()
|
|
|
|
|
|
|
|
@property
|
|
|
|
def source(self):
|
|
|
|
return self._source
|
|
|
|
|
|
|
|
@source.setter
|
|
|
|
def source(self, lang):
|
|
|
|
self._source = lang
|
|
|
|
|
|
|
|
@property
|
|
|
|
def target(self):
|
|
|
|
return self._target
|
|
|
|
|
|
|
|
@target.setter
|
|
|
|
def target(self, lang):
|
|
|
|
self._target = lang
|
|
|
|
|
|
|
|
def _type(self):
|
|
|
|
return self.__class__.__name__
|
|
|
|
|
|
|
|
def _map_language_to_code(self, *languages):
|
|
|
|
"""
|
|
|
|
map language to its corresponding code (abbreviation) if the language was passed
|
|
|
|
by its full name by the user
|
|
|
|
@param languages: list of languages
|
|
|
|
@return: mapped value of the language or raise an exception if the language is
|
|
|
|
not supported
|
|
|
|
"""
|
|
|
|
for language in languages:
|
|
|
|
if language in self._languages.values() or language == "auto":
|
|
|
|
yield language
|
|
|
|
elif language in self._languages.keys():
|
|
|
|
yield self._languages[language]
|
|
|
|
else:
|
|
|
|
raise LanguageNotSupportedException(
|
|
|
|
language,
|
|
|
|
message=f"No support for the provided language.\n"
|
|
|
|
f"Please select on of the supported languages:\n"
|
|
|
|
f"{self._languages}",
|
|
|
|
)
|
|
|
|
|
|
|
|
def _same_source_target(self) -> bool:
|
|
|
|
return self._source == self._target
|
|
|
|
|
|
|
|
def get_supported_languages(
|
|
|
|
self, as_dict: bool = False, **kwargs
|
|
|
|
) -> Union[list, dict]:
|
|
|
|
"""
|
|
|
|
return the supported languages by the Google translator
|
|
|
|
@param as_dict: if True, the languages will be returned as a dictionary
|
|
|
|
mapping languages to their abbreviations
|
|
|
|
@return: list or dict
|
|
|
|
"""
|
|
|
|
return self._supported_languages if not as_dict else self._languages
|
|
|
|
|
|
|
|
def is_language_supported(self, language: str, **kwargs) -> bool:
|
|
|
|
"""
|
|
|
|
check if the language is supported by the translator
|
|
|
|
@param language: a string for 1 language
|
|
|
|
@return: bool or raise an Exception
|
|
|
|
"""
|
|
|
|
if (
|
|
|
|
language == "auto"
|
|
|
|
or language in self._languages.keys()
|
|
|
|
or language in self._languages.values()
|
|
|
|
):
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
return False
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
def translate(self, text: str, **kwargs) -> str:
|
|
|
|
"""
|
|
|
|
translate a text using a translator under the hood and return
|
|
|
|
the translated text
|
|
|
|
@param text: text to translate
|
|
|
|
@param kwargs: additional arguments
|
|
|
|
@return: str
|
|
|
|
"""
|
|
|
|
return NotImplemented("You need to implement the translate method!")
|
|
|
|
|
|
|
|
def _read_docx(self, f: str):
|
|
|
|
import docx2txt
|
|
|
|
|
|
|
|
return docx2txt.process(f)
|
|
|
|
|
|
|
|
def _read_pdf(self, f: str):
|
|
|
|
import pypdf
|
|
|
|
|
|
|
|
reader = pypdf.PdfReader(f)
|
|
|
|
page = reader.pages[0]
|
|
|
|
return page.extract_text()
|
|
|
|
|
|
|
|
def _translate_file(self, path: str, **kwargs) -> str:
|
|
|
|
"""
|
|
|
|
translate directly from file
|
|
|
|
@param path: path to the target file
|
|
|
|
@type path: str
|
|
|
|
@param kwargs: additional args
|
|
|
|
@return: str
|
|
|
|
"""
|
|
|
|
if not isinstance(path, Path):
|
|
|
|
path = Path(path)
|
|
|
|
|
|
|
|
if not path.exists():
|
|
|
|
print("Path to the file is wrong!")
|
|
|
|
exit(1)
|
|
|
|
|
|
|
|
ext = path.suffix
|
|
|
|
|
|
|
|
if ext == ".docx":
|
|
|
|
text = self._read_docx(f=str(path))
|
|
|
|
|
|
|
|
elif ext == ".pdf":
|
|
|
|
text = self._read_pdf(f=str(path))
|
|
|
|
else:
|
|
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
|
|
text = f.read().strip()
|
|
|
|
|
|
|
|
return self.translate(text)
|
|
|
|
|
|
|
|
def _translate_batch(self, batch: List[str], **kwargs) -> List[str]:
|
|
|
|
"""
|
|
|
|
translate a list of texts
|
|
|
|
@param batch: list of texts you want to translate
|
|
|
|
@return: list of translations
|
|
|
|
"""
|
|
|
|
if not batch:
|
|
|
|
raise Exception("Enter your text list that you want to translate")
|
|
|
|
arr = []
|
|
|
|
for i, text in enumerate(batch):
|
|
|
|
translated = self.translate(text, **kwargs)
|
|
|
|
arr.append(translated)
|
|
|
|
return arr
|