You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
bazarr/libs/deep_translator/base.py

184 lines
5.5 KiB

"""base translator class"""
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
from abc import ABC, abstractmethod
from pathlib import Path
from typing import List, Optional, Union
from deep_translator.constants import GOOGLE_LANGUAGES_TO_CODES
from deep_translator.exceptions import (
InvalidSourceOrTargetLanguage,
LanguageNotSupportedException,
)
class BaseTranslator(ABC):
"""
Abstract class that serve as a base translator for other different translators
"""
def __init__(
self,
base_url: str = None,
languages: dict = GOOGLE_LANGUAGES_TO_CODES,
source: str = "auto",
target: str = "en",
payload_key: Optional[str] = None,
element_tag: Optional[str] = None,
element_query: Optional[dict] = None,
**url_params,
):
"""
@param source: source language to translate from
@param target: target language to translate to
"""
self._base_url = base_url
self._languages = languages
self._supported_languages = list(self._languages.keys())
if not source:
raise InvalidSourceOrTargetLanguage(source)
if not target:
raise InvalidSourceOrTargetLanguage(target)
self._source, self._target = self._map_language_to_code(source, target)
self._url_params = url_params
self._element_tag = element_tag
self._element_query = element_query
self.payload_key = payload_key
super().__init__()
@property
def source(self):
return self._source
@source.setter
def source(self, lang):
self._source = lang
@property
def target(self):
return self._target
@target.setter
def target(self, lang):
self._target = lang
def _type(self):
return self.__class__.__name__
def _map_language_to_code(self, *languages):
"""
map language to its corresponding code (abbreviation) if the language was passed
by its full name by the user
@param languages: list of languages
@return: mapped value of the language or raise an exception if the language is
not supported
"""
for language in languages:
if language in self._languages.values() or language == "auto":
yield language
elif language in self._languages.keys():
yield self._languages[language]
else:
raise LanguageNotSupportedException(
language,
message=f"No support for the provided language.\n"
f"Please select on of the supported languages:\n"
f"{self._languages}",
)
def _same_source_target(self) -> bool:
return self._source == self._target
def get_supported_languages(
self, as_dict: bool = False, **kwargs
) -> Union[list, dict]:
"""
return the supported languages by the Google translator
@param as_dict: if True, the languages will be returned as a dictionary
mapping languages to their abbreviations
@return: list or dict
"""
return self._supported_languages if not as_dict else self._languages
def is_language_supported(self, language: str, **kwargs) -> bool:
"""
check if the language is supported by the translator
@param language: a string for 1 language
@return: bool or raise an Exception
"""
if (
language == "auto"
or language in self._languages.keys()
or language in self._languages.values()
):
return True
else:
return False
@abstractmethod
def translate(self, text: str, **kwargs) -> str:
"""
translate a text using a translator under the hood and return
the translated text
@param text: text to translate
@param kwargs: additional arguments
@return: str
"""
return NotImplemented("You need to implement the translate method!")
def _read_docx(self, f: str):
import docx2txt
return docx2txt.process(f)
def _read_pdf(self, f: str):
import pypdf
reader = pypdf.PdfReader(f)
page = reader.pages[0]
return page.extract_text()
def _translate_file(self, path: str, **kwargs) -> str:
"""
translate directly from file
@param path: path to the target file
@type path: str
@param kwargs: additional args
@return: str
"""
if not isinstance(path, Path):
path = Path(path)
if not path.exists():
print("Path to the file is wrong!")
exit(1)
ext = path.suffix
if ext == ".docx":
text = self._read_docx(f=str(path))
elif ext == ".pdf":
text = self._read_pdf(f=str(path))
else:
with open(path, "r", encoding="utf-8") as f:
text = f.read().strip()
return self.translate(text)
def _translate_batch(self, batch: List[str], **kwargs) -> List[str]:
"""
translate a list of texts
@param batch: list of texts you want to translate
@return: list of translations
"""
if not batch:
raise Exception("Enter your text list that you want to translate")
arr = []
for i, text in enumerate(batch):
translated = self.translate(text, **kwargs)
arr.append(translated)
return arr