parent
6e3f4bf804
commit
49c6e8b3fb
@ -0,0 +1,25 @@
|
||||
"""Top-level package for deep_translator."""
|
||||
|
||||
from .google_trans import GoogleTranslator
|
||||
from .pons import PonsTranslator
|
||||
from .linguee import LingueeTranslator
|
||||
from .mymemory import MyMemoryTranslator
|
||||
from .yandex import YandexTranslator
|
||||
from .qcri import QCRI
|
||||
from .deepl import DeepL
|
||||
from .detection import single_detection, batch_detection
|
||||
|
||||
|
||||
__author__ = """Nidhal Baccouri"""
|
||||
__email__ = 'nidhalbacc@gmail.com'
|
||||
__version__ = '1.3.2'
|
||||
|
||||
__all__ = [GoogleTranslator,
|
||||
PonsTranslator,
|
||||
LingueeTranslator,
|
||||
MyMemoryTranslator,
|
||||
YandexTranslator,
|
||||
QCRI,
|
||||
DeepL,
|
||||
single_detection,
|
||||
batch_detection]
|
@ -0,0 +1,52 @@
|
||||
"""Console script for deep_translator."""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from .google_trans import GoogleTranslator
|
||||
from .mymemory import MyMemoryTranslator
|
||||
from .pons import PonsTranslator
|
||||
from .linguee import LingueeTranslator
|
||||
|
||||
|
||||
def translate(args):
|
||||
"""
|
||||
function used to provide translations from the parsed terminal arguments
|
||||
@param args: parsed terminal arguments
|
||||
@return: None
|
||||
"""
|
||||
translator = None
|
||||
if args.translator == 'google':
|
||||
translator = GoogleTranslator(source=args.source, target=args.target)
|
||||
elif args.translator == 'pons':
|
||||
translator = PonsTranslator(source=args.source, target=args.target)
|
||||
elif args.translator == 'linguee':
|
||||
translator = LingueeTranslator(source=args.source, target=args.target)
|
||||
elif args.translator == 'mymemory':
|
||||
translator = MyMemoryTranslator(source=args.source, target=args.target)
|
||||
else:
|
||||
print("given translator is not supported. Please use a supported translator from the deep_translator tool")
|
||||
|
||||
res = translator.translate(args.text)
|
||||
print(" | Translation from {} to {} |".format(args.source, args.target))
|
||||
print("Translated text: \n {}".format(res))
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
function responsible for parsing terminal arguments and provide them for further use in the translation process
|
||||
|
||||
"""
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--translator', '-trans',
|
||||
default='google', type=str, help="name of the translator you want to use")
|
||||
parser.add_argument('--source', '-src', type=str, help="source language to translate from", required=True)
|
||||
parser.add_argument('--target', '-tg', type=str, help="target language to translate to", required=True)
|
||||
parser.add_argument('--text', '-txt', type=str, help="text you want to translate", required=True)
|
||||
|
||||
args = parser.parse_args()
|
||||
translate(args)
|
||||
# sys.exit()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -0,0 +1,11 @@
|
||||
"""
|
||||
configuration object that holds data about the language detection api
|
||||
"""
|
||||
|
||||
config = {
|
||||
"url": 'https://ws.detectlanguage.com/0.2/detect',
|
||||
"headers": {
|
||||
'User-Agent': 'Detect Language API Python Client 1.4.0',
|
||||
'Authorization': 'Bearer {}',
|
||||
}
|
||||
}
|
@ -0,0 +1,183 @@
|
||||
|
||||
|
||||
BASE_URLS = {
|
||||
"GOOGLE_TRANSLATE": "https://translate.google.com/m",
|
||||
"PONS": "https://en.pons.com/translate/",
|
||||
"YANDEX": "https://translate.yandex.net/api/{version}/tr.json/{endpoint}",
|
||||
"LINGUEE": "https://www.linguee.com/",
|
||||
"MYMEMORY": "http://api.mymemory.translated.net/get",
|
||||
"QCRI": "https://mt.qcri.org/api/v1/{endpoint}?",
|
||||
"DEEPL": "https://api.deepl.com/{version}/"
|
||||
}
|
||||
|
||||
GOOGLE_CODES_TO_LANGUAGES = {
|
||||
'af': 'afrikaans',
|
||||
'sq': 'albanian',
|
||||
'am': 'amharic',
|
||||
'ar': 'arabic',
|
||||
'hy': 'armenian',
|
||||
'az': 'azerbaijani',
|
||||
'eu': 'basque',
|
||||
'be': 'belarusian',
|
||||
'bn': 'bengali',
|
||||
'bs': 'bosnian',
|
||||
'bg': 'bulgarian',
|
||||
'ca': 'catalan',
|
||||
'ceb': 'cebuano',
|
||||
'ny': 'chichewa',
|
||||
'zh-cn': 'chinese (simplified)',
|
||||
'zh-tw': 'chinese (traditional)',
|
||||
'co': 'corsican',
|
||||
'hr': 'croatian',
|
||||
'cs': 'czech',
|
||||
'da': 'danish',
|
||||
'nl': 'dutch',
|
||||
'en': 'english',
|
||||
'eo': 'esperanto',
|
||||
'et': 'estonian',
|
||||
'tl': 'filipino',
|
||||
'fi': 'finnish',
|
||||
'fr': 'french',
|
||||
'fy': 'frisian',
|
||||
'gl': 'galician',
|
||||
'ka': 'georgian',
|
||||
'de': 'german',
|
||||
'el': 'greek',
|
||||
'gu': 'gujarati',
|
||||
'ht': 'haitian creole',
|
||||
'ha': 'hausa',
|
||||
'haw': 'hawaiian',
|
||||
'iw': 'hebrew',
|
||||
'hi': 'hindi',
|
||||
'hmn': 'hmong',
|
||||
'hu': 'hungarian',
|
||||
'is': 'icelandic',
|
||||
'ig': 'igbo',
|
||||
'id': 'indonesian',
|
||||
'ga': 'irish',
|
||||
'it': 'italian',
|
||||
'ja': 'japanese',
|
||||
'jw': 'javanese',
|
||||
'kn': 'kannada',
|
||||
'kk': 'kazakh',
|
||||
'km': 'khmer',
|
||||
'ko': 'korean',
|
||||
'ku': 'kurdish (kurmanji)',
|
||||
'ky': 'kyrgyz',
|
||||
'lo': 'lao',
|
||||
'la': 'latin',
|
||||
'lv': 'latvian',
|
||||
'lt': 'lithuanian',
|
||||
'lb': 'luxembourgish',
|
||||
'mk': 'macedonian',
|
||||
'mg': 'malagasy',
|
||||
'ms': 'malay',
|
||||
'ml': 'malayalam',
|
||||
'mt': 'maltese',
|
||||
'mi': 'maori',
|
||||
'mr': 'marathi',
|
||||
'mn': 'mongolian',
|
||||
'my': 'myanmar (burmese)',
|
||||
'ne': 'nepali',
|
||||
'no': 'norwegian',
|
||||
'ps': 'pashto',
|
||||
'fa': 'persian',
|
||||
'pl': 'polish',
|
||||
'pt': 'portuguese',
|
||||
'pa': 'punjabi',
|
||||
'ro': 'romanian',
|
||||
'ru': 'russian',
|
||||
'sm': 'samoan',
|
||||
'gd': 'scots gaelic',
|
||||
'sr': 'serbian',
|
||||
'st': 'sesotho',
|
||||
'sn': 'shona',
|
||||
'sd': 'sindhi',
|
||||
'si': 'sinhala',
|
||||
'sk': 'slovak',
|
||||
'sl': 'slovenian',
|
||||
'so': 'somali',
|
||||
'es': 'spanish',
|
||||
'su': 'sundanese',
|
||||
'sw': 'swahili',
|
||||
'sv': 'swedish',
|
||||
'tg': 'tajik',
|
||||
'ta': 'tamil',
|
||||
'te': 'telugu',
|
||||
'th': 'thai',
|
||||
'tr': 'turkish',
|
||||
'uk': 'ukrainian',
|
||||
'ur': 'urdu',
|
||||
'uz': 'uzbek',
|
||||
'vi': 'vietnamese',
|
||||
'cy': 'welsh',
|
||||
'xh': 'xhosa',
|
||||
'yi': 'yiddish',
|
||||
'yo': 'yoruba',
|
||||
'zu': 'zulu',
|
||||
'fil': 'Filipino',
|
||||
'he': 'Hebrew'
|
||||
}
|
||||
|
||||
GOOGLE_LANGUAGES_TO_CODES = {v: k for k, v in GOOGLE_CODES_TO_LANGUAGES.items()}
|
||||
|
||||
PONS_CODES_TO_LANGUAGES = {
|
||||
'ar': 'arabic',
|
||||
'bg': 'bulgarian',
|
||||
'zh-cn': 'chinese',
|
||||
'cs': 'czech',
|
||||
'da': 'danish',
|
||||
'nl': 'dutch',
|
||||
'en': 'english',
|
||||
'fr': 'french',
|
||||
'de': 'german',
|
||||
'el': 'greek',
|
||||
'hu': 'hungarian',
|
||||
'it': 'italian',
|
||||
'la': 'latin',
|
||||
'no': 'norwegian',
|
||||
'pl': 'polish',
|
||||
'pt': 'portuguese',
|
||||
'ru': 'russian',
|
||||
'sl': 'slovenian',
|
||||
'es': 'spanish',
|
||||
'sv': 'swedish',
|
||||
'tr': 'turkish',
|
||||
'elv': 'elvish'
|
||||
}
|
||||
|
||||
PONS_LANGUAGES_TO_CODES = {v: k for k, v in PONS_CODES_TO_LANGUAGES.items()}
|
||||
|
||||
LINGUEE_LANGUAGES_TO_CODES = {
|
||||
"maltese": "mt",
|
||||
"english": "en",
|
||||
"german": "de",
|
||||
"bulgarian": "bg",
|
||||
"polish": "pl",
|
||||
"portuguese": "pt",
|
||||
"hungarian": "hu",
|
||||
"romanian": "ro",
|
||||
"russian": "ru",
|
||||
#"serbian": "sr",
|
||||
"dutch": "nl",
|
||||
"slovakian": "sk",
|
||||
"greek": "el",
|
||||
"slovenian": "sl",
|
||||
"danish": "da",
|
||||
"italian": "it",
|
||||
"spanish": "es",
|
||||
"finnish": "fi",
|
||||
"chinese": "zh",
|
||||
"french": "fr",
|
||||
#"croatian": "hr",
|
||||
"czech": "cs",
|
||||
"laotian": "lo",
|
||||
"swedish": "sv",
|
||||
"latvian": "lv",
|
||||
"estonian": "et",
|
||||
"japanese": "ja"
|
||||
}
|
||||
|
||||
LINGUEE_CODE_TO_LANGUAGE = {v: k for k, v in LINGUEE_LANGUAGES_TO_CODES.items()}
|
||||
|
||||
# "72e9e2cc7c992db4dcbdd6fb9f91a0d1"
|
@ -0,0 +1,59 @@
|
||||
|
||||
import requests
|
||||
from requests.utils import requote_uri
|
||||
from deep_translator.constants import BASE_URLS
|
||||
from deep_translator.exceptions import (RequestError,
|
||||
ServerException, TranslationNotFound, TooManyRequests)
|
||||
|
||||
|
||||
class DeepL(object):
|
||||
"""
|
||||
class that wraps functions, which use the DeepL translator under the hood to translate word(s)
|
||||
"""
|
||||
|
||||
def __init__(self, api_key=None):
|
||||
"""
|
||||
@param api_key: your DeepL api key. Get one here: https://www.deepl.com/docs-api/accessing-the-api/
|
||||
"""
|
||||
|
||||
if not api_key:
|
||||
raise ServerException(401)
|
||||
self.version = 'v2'
|
||||
self.api_key = api_key
|
||||
self.__base_url = BASE_URLS.get("DEEPL").format(version=self.version)
|
||||
|
||||
def translate(self, source, target, text):
|
||||
params = {
|
||||
"auth_key": self.api_key,
|
||||
"target_lang": target,
|
||||
"source_lang": source,
|
||||
"text": text
|
||||
}
|
||||
try:
|
||||
response = requests.get(self.__base_url, params=params)
|
||||
except ConnectionError:
|
||||
raise ServerException(503)
|
||||
|
||||
else:
|
||||
if response.status_code != 200:
|
||||
ServerException(response.status_code)
|
||||
else:
|
||||
res = response.json()
|
||||
if not res:
|
||||
raise TranslationNotFound(text)
|
||||
return res
|
||||
|
||||
def translate_batch(self, source, target, batch):
|
||||
"""
|
||||
translate a batch of texts
|
||||
@param source: source language
|
||||
@param target: target language
|
||||
@param batch: list of texts to translate
|
||||
@return: list of translations
|
||||
"""
|
||||
return [self.translate(source, target, text) for text in batch]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
d = DeepL(api_key="key")
|
||||
print(d)
|
@ -0,0 +1,76 @@
|
||||
"""
|
||||
language detection API
|
||||
"""
|
||||
import requests
|
||||
from deep_translator.configs import config
|
||||
from requests.exceptions import HTTPError
|
||||
|
||||
|
||||
def get_request_body(text, api_key, *args):
|
||||
"""
|
||||
send a request and return the response body parsed as dictionary
|
||||
|
||||
@param text: target text that you want to detect its language
|
||||
@type text: str
|
||||
@type api_key: str
|
||||
@param api_key: your private API key
|
||||
|
||||
"""
|
||||
if not api_key:
|
||||
raise Exception("you need to get an API_KEY for this to work. "
|
||||
"Get one for free here: https://detectlanguage.com/documentation")
|
||||
if not text:
|
||||
raise Exception("Please provide an input text")
|
||||
|
||||
else:
|
||||
try:
|
||||
headers = config['headers']
|
||||
headers['Authorization'] = headers['Authorization'].format(api_key)
|
||||
response = requests.post(config['url'],
|
||||
json={'q': text},
|
||||
headers=headers)
|
||||
|
||||
body = response.json().get('data')
|
||||
return body
|
||||
|
||||
except HTTPError as e:
|
||||
print("Error occured while requesting from server: ", e.args)
|
||||
raise e
|
||||
|
||||
|
||||
def single_detection(text, api_key=None, detailed=False, *args, **kwargs):
|
||||
"""
|
||||
function responsible for detecting the language from a text
|
||||
|
||||
@param text: target text that you want to detect its language
|
||||
@type text: str
|
||||
@type api_key: str
|
||||
@param api_key: your private API key
|
||||
@param detailed: set to True if you want to get detailed information about the detection process
|
||||
"""
|
||||
body = get_request_body(text, api_key)
|
||||
detections = body.get('detections')
|
||||
if detailed:
|
||||
return detections[0]
|
||||
|
||||
lang = detections[0].get('language', None)
|
||||
if lang:
|
||||
return lang
|
||||
|
||||
|
||||
def batch_detection(text_list, api_key, detailed=False, *args):
|
||||
"""
|
||||
function responsible for detecting the language from a text
|
||||
|
||||
@param text_list: target batch that you want to detect its language
|
||||
@param api_key: your private API key
|
||||
@param detailed: set to True if you want to get detailed information about the detection process
|
||||
"""
|
||||
body = get_request_body(text_list, api_key)
|
||||
detections = body.get('detections')
|
||||
res = [obj[0] for obj in detections]
|
||||
if detailed:
|
||||
return res
|
||||
else:
|
||||
return [obj['language'] for obj in res]
|
||||
|
@ -0,0 +1,113 @@
|
||||
class BaseError(Exception):
|
||||
"""
|
||||
base error structure class
|
||||
"""
|
||||
|
||||
def __init__(self, val, message):
|
||||
"""
|
||||
@param val: actual value
|
||||
@param message: message shown to the user
|
||||
"""
|
||||
self.val = val
|
||||
self.message = message
|
||||
super().__init__()
|
||||
|
||||
def __str__(self):
|
||||
return "{} --> {}".format(self.val, self.message)
|
||||
|
||||
|
||||
class LanguageNotSupportedException(BaseError):
|
||||
"""
|
||||
exception thrown if the user uses a language that is not supported by the deep_translator
|
||||
"""
|
||||
|
||||
def __init__(self, val, message="There is no support for the chosen language"):
|
||||
super().__init__(val, message)
|
||||
|
||||
|
||||
class NotValidPayload(BaseError):
|
||||
"""
|
||||
exception thrown if the user enters an invalid payload
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
val,
|
||||
message='text must be a valid text with maximum 5000 character, otherwise it cannot be translated'):
|
||||
super(NotValidPayload, self).__init__(val, message)
|
||||
|
||||
|
||||
class TranslationNotFound(BaseError):
|
||||
"""
|
||||
exception thrown if no translation was found for the text provided by the user
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
val,
|
||||
message='No translation was found using the current translator. Try another translator?'):
|
||||
super(TranslationNotFound, self).__init__(val, message)
|
||||
|
||||
|
||||
class ElementNotFoundInGetRequest(BaseError):
|
||||
"""
|
||||
exception thrown if the html element was not found in the body parsed by beautifulsoup
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
val,
|
||||
message='Required element was not found in the API response'):
|
||||
super(ElementNotFoundInGetRequest, self).__init__(val, message)
|
||||
|
||||
|
||||
class NotValidLength(BaseError):
|
||||
"""
|
||||
exception thrown if the provided text exceed the length limit of the translator
|
||||
"""
|
||||
|
||||
def __init__(self, val, min_chars, max_chars):
|
||||
message = "Text length need to be between {} and {} characters".format(min_chars, max_chars)
|
||||
super(NotValidLength, self).__init__(val, message)
|
||||
|
||||
|
||||
class RequestError(Exception):
|
||||
"""
|
||||
exception thrown if an error occured during the request call, e.g a connection problem.
|
||||
"""
|
||||
|
||||
def __init__(self, message="Request exception can happen due to an api connection error. "
|
||||
"Please check your connection and try again"):
|
||||
self.message = message
|
||||
|
||||
def __str__(self):
|
||||
return self.message
|
||||
|
||||
|
||||
class TooManyRequests(Exception):
|
||||
"""
|
||||
exception thrown if an error occured during the request call, e.g a connection problem.
|
||||
"""
|
||||
|
||||
def __init__(self, message="Server Error: You made too many requests to the server. According to google, you are allowed to make 5 requests per second and up to 200k requests per day. You can wait and try again later or you can try the translate_batch function"):
|
||||
self.message = message
|
||||
|
||||
def __str__(self):
|
||||
return self.message
|
||||
|
||||
|
||||
class ServerException(Exception):
|
||||
"""
|
||||
Default YandexTranslate exception from the official website
|
||||
"""
|
||||
errors = {
|
||||
401: "ERR_KEY_INVALID",
|
||||
402: "ERR_KEY_BLOCKED",
|
||||
403: "ERR_DAILY_REQ_LIMIT_EXCEEDED",
|
||||
404: "ERR_DAILY_CHAR_LIMIT_EXCEEDED",
|
||||
413: "ERR_TEXT_TOO_LONG",
|
||||
422: "ERR_UNPROCESSABLE_TEXT",
|
||||
501: "ERR_LANG_NOT_SUPPORTED",
|
||||
503: "ERR_SERVICE_NOT_AVAIBLE",
|
||||
}
|
||||
|
||||
def __init__(self, status_code, *args):
|
||||
message = self.errors.get(status_code, "API server error")
|
||||
super(ServerException, self).__init__(message, *args)
|
@ -0,0 +1,173 @@
|
||||
"""
|
||||
google translator API
|
||||
"""
|
||||
|
||||
from deep_translator.constants import BASE_URLS, GOOGLE_LANGUAGES_TO_CODES
|
||||
from deep_translator.exceptions import TooManyRequests, LanguageNotSupportedException, TranslationNotFound, NotValidPayload, RequestError
|
||||
from deep_translator.parent import BaseTranslator
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from time import sleep
|
||||
import warnings
|
||||
import logging
|
||||
|
||||
|
||||
class GoogleTranslator(BaseTranslator):
|
||||
"""
|
||||
class that wraps functions, which use google translate under the hood to translate text(s)
|
||||
"""
|
||||
_languages = GOOGLE_LANGUAGES_TO_CODES
|
||||
supported_languages = list(_languages.keys())
|
||||
|
||||
def __init__(self, source="auto", target="en"):
|
||||
"""
|
||||
@param source: source language to translate from
|
||||
@param target: target language to translate to
|
||||
"""
|
||||
self.__base_url = BASE_URLS.get("GOOGLE_TRANSLATE")
|
||||
|
||||
if self.is_language_supported(source, target):
|
||||
self._source, self._target = self._map_language_to_code(source.lower(), target.lower())
|
||||
|
||||
super(GoogleTranslator, self).__init__(base_url=self.__base_url,
|
||||
source=self._source,
|
||||
target=self._target,
|
||||
element_tag='div',
|
||||
element_query={"class": "t0"},
|
||||
payload_key='q', # key of text in the url
|
||||
hl=self._target,
|
||||
sl=self._source)
|
||||
|
||||
self._alt_element_query = {"class": "result-container"}
|
||||
|
||||
@staticmethod
|
||||
def get_supported_languages(as_dict=False):
|
||||
"""
|
||||
return the supported languages by the google translator
|
||||
@param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
|
||||
@return: list or dict
|
||||
"""
|
||||
return GoogleTranslator.supported_languages if not as_dict else GoogleTranslator._languages
|
||||
|
||||
def _map_language_to_code(self, *languages):
|
||||
"""
|
||||
map language to its corresponding code (abbreviation) if the language was passed by its full name by the user
|
||||
@param languages: list of languages
|
||||
@return: mapped value of the language or raise an exception if the language is not supported
|
||||
"""
|
||||
for language in languages:
|
||||
if language in self._languages.values() or language == 'auto':
|
||||
yield language
|
||||
elif language in self._languages.keys():
|
||||
yield self._languages[language]
|
||||
else:
|
||||
raise LanguageNotSupportedException(language)
|
||||
|
||||
def is_language_supported(self, *languages):
|
||||
"""
|
||||
check if the language is supported by the translator
|
||||
@param languages: list of languages
|
||||
@return: bool or raise an Exception
|
||||
"""
|
||||
for lang in languages:
|
||||
if lang != 'auto' and lang not in self._languages.keys():
|
||||
if lang != 'auto' and lang not in self._languages.values():
|
||||
raise LanguageNotSupportedException(lang)
|
||||
return True
|
||||
|
||||
def translate(self, text, **kwargs):
|
||||
"""
|
||||
function that uses google translate to translate a text
|
||||
@param text: desired text to translate
|
||||
@return: str: translated text
|
||||
"""
|
||||
|
||||
if self._validate_payload(text):
|
||||
text = text.strip()
|
||||
|
||||
if self.payload_key:
|
||||
self._url_params[self.payload_key] = text
|
||||
|
||||
response = requests.get(self.__base_url,
|
||||
params=self._url_params, headers ={'User-agent': 'your bot 0.1'})
|
||||
|
||||
if response.status_code == 429:
|
||||
raise TooManyRequests()
|
||||
|
||||
if response.status_code != 200:
|
||||
# print("status code", response.status_code)
|
||||
raise RequestError()
|
||||
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
element = soup.find(self._element_tag, self._element_query)
|
||||
|
||||
if not element:
|
||||
element = soup.find(self._element_tag, self._alt_element_query)
|
||||
if not element:
|
||||
raise TranslationNotFound(text)
|
||||
|
||||
return element.get_text(strip=True)
|
||||
|
||||
def translate_file(self, path, **kwargs):
|
||||
"""
|
||||
translate directly from file
|
||||
@param path: path to the target file
|
||||
@type path: str
|
||||
@param kwargs: additional args
|
||||
@return: str
|
||||
"""
|
||||
try:
|
||||
with open(path) as f:
|
||||
text = f.read()
|
||||
|
||||
return self.translate(text=text)
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def translate_sentences(self, sentences=None, **kwargs):
|
||||
"""
|
||||
translate many sentences together. This makes sense if you have sentences with different languages
|
||||
and you want to translate all to unified language. This is handy because it detects
|
||||
automatically the language of each sentence and then translate it.
|
||||
|
||||
@param sentences: list of sentences to translate
|
||||
@return: list of all translated sentences
|
||||
"""
|
||||
warnings.warn("deprecated. Use the translate_batch function instead", DeprecationWarning, stacklevel=2)
|
||||
logging.warning("deprecated. Use the translate_batch function instead")
|
||||
if not sentences:
|
||||
raise NotValidPayload(sentences)
|
||||
|
||||
translated_sentences = []
|
||||
try:
|
||||
for sentence in sentences:
|
||||
translated = self.translate(text=sentence)
|
||||
translated_sentences.append(translated)
|
||||
|
||||
return translated_sentences
|
||||
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def translate_batch(self, batch=None):
|
||||
"""
|
||||
translate a list of texts
|
||||
@param batch: list of texts you want to translate
|
||||
@return: list of translations
|
||||
"""
|
||||
if not batch:
|
||||
raise Exception("Enter your text list that you want to translate")
|
||||
|
||||
arr = []
|
||||
for text in batch:
|
||||
translated = self.translate(text)
|
||||
arr.append(translated)
|
||||
sleep(2)
|
||||
|
||||
return arr
|
||||
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# for _ in range(10):
|
||||
# txt = GoogleTranslator(source="en", target="ar").translate("Hello how are you")
|
||||
# print("text: ", txt)
|
@ -0,0 +1,130 @@
|
||||
"""
|
||||
linguee translator API
|
||||
"""
|
||||
|
||||
from deep_translator.constants import BASE_URLS, LINGUEE_LANGUAGES_TO_CODES, LINGUEE_CODE_TO_LANGUAGE
|
||||
from deep_translator.exceptions import (LanguageNotSupportedException,
|
||||
TranslationNotFound,
|
||||
NotValidPayload,
|
||||
ElementNotFoundInGetRequest,
|
||||
RequestError,
|
||||
TooManyRequests)
|
||||
from deep_translator.parent import BaseTranslator
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from requests.utils import requote_uri
|
||||
|
||||
|
||||
class LingueeTranslator(BaseTranslator):
|
||||
"""
|
||||
class that wraps functions, which use the linguee translator under the hood to translate word(s)
|
||||
"""
|
||||
_languages = LINGUEE_LANGUAGES_TO_CODES
|
||||
supported_languages = list(_languages.keys())
|
||||
|
||||
def __init__(self, source, target="en"):
|
||||
"""
|
||||
@param source: source language to translate from
|
||||
@param target: target language to translate to
|
||||
"""
|
||||
self.__base_url = BASE_URLS.get("LINGUEE")
|
||||
|
||||
if self.is_language_supported(source, target):
|
||||
self._source, self._target = self._map_language_to_code(source.lower(), target.lower())
|
||||
|
||||
super().__init__(base_url=self.__base_url,
|
||||
source=self._source,
|
||||
target=self._target,
|
||||
element_tag='a',
|
||||
element_query={'class': 'dictLink featured'},
|
||||
payload_key=None, # key of text in the url
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_supported_languages(as_dict=False):
|
||||
"""
|
||||
return the supported languages by the linguee translator
|
||||
@param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
|
||||
@return: list or dict
|
||||
"""
|
||||
return LingueeTranslator.supported_languages if not as_dict else LingueeTranslator._languages
|
||||
|
||||
def _map_language_to_code(self, *languages, **kwargs):
|
||||
"""
|
||||
map language to its corresponding code (abbreviation) if the language was passed by its full name by the user
|
||||
@param languages: list of languages
|
||||
@return: mapped value of the language or raise an exception if the language is not supported
|
||||
"""
|
||||
for language in languages:
|
||||
if language in self._languages.values():
|
||||
yield LINGUEE_CODE_TO_LANGUAGE[language]
|
||||
elif language in self._languages.keys():
|
||||
yield language
|
||||
else:
|
||||
raise LanguageNotSupportedException(language)
|
||||
|
||||
def is_language_supported(self, *languages, **kwargs):
|
||||
"""
|
||||
check if the language is supported by the translator
|
||||
@param languages: list of languages
|
||||
@return: bool or raise an Exception
|
||||
"""
|
||||
for lang in languages:
|
||||
if lang not in self._languages.keys():
|
||||
if lang not in self._languages.values():
|
||||
raise LanguageNotSupportedException(lang)
|
||||
return True
|
||||
|
||||
def translate(self, word, return_all=False, **kwargs):
|
||||
"""
|
||||
function that uses linguee to translate a word
|
||||
@param word: word to translate
|
||||
@type word: str
|
||||
@param return_all: set to True to return all synonym of the translated word
|
||||
@type return_all: bool
|
||||
@return: str: translated word
|
||||
"""
|
||||
if self._validate_payload(word, max_chars=50):
|
||||
# %s-%s/translation/%s.html
|
||||
url = "{}{}-{}/translation/{}.html".format(self.__base_url, self._source, self._target, word)
|
||||
url = requote_uri(url)
|
||||
response = requests.get(url)
|
||||
|
||||
if response.status_code == 429:
|
||||
raise TooManyRequests()
|
||||
|
||||
if response.status_code != 200:
|
||||
raise RequestError()
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
elements = soup.find_all(self._element_tag, self._element_query)
|
||||
if not elements:
|
||||
raise ElementNotFoundInGetRequest(elements)
|
||||
|
||||
filtered_elements = []
|
||||
for el in elements:
|
||||
try:
|
||||
pronoun = el.find('span', {'class': 'placeholder'}).get_text(strip=True)
|
||||
except AttributeError:
|
||||
pronoun = ''
|
||||
filtered_elements.append(el.get_text(strip=True).replace(pronoun, ''))
|
||||
|
||||
if not filtered_elements:
|
||||
raise TranslationNotFound(word)
|
||||
|
||||
return filtered_elements if return_all else filtered_elements[0]
|
||||
|
||||
def translate_words(self, words, **kwargs):
|
||||
"""
|
||||
translate a batch of words together by providing them in a list
|
||||
@param words: list of words you want to translate
|
||||
@param kwargs: additional args
|
||||
@return: list of translated words
|
||||
"""
|
||||
if not words:
|
||||
raise NotValidPayload(words)
|
||||
|
||||
translated_words = []
|
||||
for word in words:
|
||||
translated_words.append(self.translate(payload=word))
|
||||
return translated_words
|
||||
|
@ -0,0 +1,174 @@
|
||||
"""
|
||||
mymemory translator API
|
||||
"""
|
||||
import logging
|
||||
import warnings
|
||||
|
||||
from deep_translator.constants import BASE_URLS, GOOGLE_LANGUAGES_TO_CODES
|
||||
from deep_translator.exceptions import (NotValidPayload,
|
||||
TranslationNotFound,
|
||||
LanguageNotSupportedException,
|
||||
RequestError,
|
||||
TooManyRequests)
|
||||
from deep_translator.parent import BaseTranslator
|
||||
import requests
|
||||
from time import sleep
|
||||
|
||||
|
||||
class MyMemoryTranslator(BaseTranslator):
|
||||
"""
|
||||
class that uses the mymemory translator to translate texts
|
||||
"""
|
||||
_languages = GOOGLE_LANGUAGES_TO_CODES
|
||||
supported_languages = list(_languages.keys())
|
||||
|
||||
def __init__(self, source="auto", target="en", **kwargs):
|
||||
"""
|
||||
@param source: source language to translate from
|
||||
@param target: target language to translate to
|
||||
"""
|
||||
self.__base_url = BASE_URLS.get("MYMEMORY")
|
||||
if self.is_language_supported(source, target):
|
||||
self._source, self._target = self._map_language_to_code(source.lower(), target.lower())
|
||||
self._source = self._source if self._source != 'auto' else 'Lao'
|
||||
|
||||
self.email = kwargs.get('email', None)
|
||||
super(MyMemoryTranslator, self).__init__(base_url=self.__base_url,
|
||||
source=self._source,
|
||||
target=self._target,
|
||||
payload_key='q',
|
||||
langpair='{}|{}'.format(self._source, self._target))
|
||||
|
||||
@staticmethod
|
||||
def get_supported_languages(as_dict=False):
|
||||
"""
|
||||
return the supported languages by the mymemory translator
|
||||
@param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
|
||||
@return: list or dict
|
||||
"""
|
||||
return MyMemoryTranslator.supported_languages if not as_dict else MyMemoryTranslator._languages
|
||||
|
||||
def _map_language_to_code(self, *languages):
|
||||
"""
|
||||
map language to its corresponding code (abbreviation) if the language was passed by its full name by the user
|
||||
@param languages: list of languages
|
||||
@return: mapped value of the language or raise an exception if the language is not supported
|
||||
"""
|
||||
for language in languages:
|
||||
if language in self._languages.values() or language == 'auto':
|
||||
yield language
|
||||
elif language in self._languages.keys():
|
||||
yield self._languages[language]
|
||||
else:
|
||||
raise LanguageNotSupportedException(language)
|
||||
|
||||
def is_language_supported(self, *languages):
|
||||
"""
|
||||
check if the language is supported by the translator
|
||||
@param languages: list of languages
|
||||
@return: bool or raise an Exception
|
||||
"""
|
||||
for lang in languages:
|
||||
if lang != 'auto' and lang not in self._languages.keys():
|
||||
if lang != 'auto' and lang not in self._languages.values():
|
||||
raise LanguageNotSupportedException(lang)
|
||||
return True
|
||||
|
||||
def translate(self, text, return_all=False, **kwargs):
|
||||
"""
|
||||
function that uses the mymemory translator to translate a text
|
||||
@param text: desired text to translate
|
||||
@type text: str
|
||||
@param return_all: set to True to return all synonym/similars of the translated text
|
||||
@return: str or list
|
||||
"""
|
||||
|
||||
if self._validate_payload(text, max_chars=500):
|
||||
text = text.strip()
|
||||
|
||||
if self.payload_key:
|
||||
self._url_params[self.payload_key] = text
|
||||
if self.email:
|
||||
self._url_params['de'] = self.email
|
||||
|
||||
response = requests.get(self.__base_url,
|
||||
params=self._url_params,
|
||||
headers=self.headers)
|
||||
|
||||
if response.status_code == 429:
|
||||
raise TooManyRequests()
|
||||
if response.status_code != 200:
|
||||
raise RequestError()
|
||||
|
||||
data = response.json()
|
||||
if not data:
|
||||
TranslationNotFound(text)
|
||||
|
||||
translation = data.get('responseData').get('translatedText')
|
||||
if translation:
|
||||
return translation
|
||||
|
||||
elif not translation:
|
||||
all_matches = data.get('matches')
|
||||
matches = (match['translation'] for match in all_matches)
|
||||
next_match = next(matches)
|
||||
return next_match if not return_all else list(all_matches)
|
||||
|
||||
def translate_sentences(self, sentences=None, **kwargs):
|
||||
"""
|
||||
translate many sentences together. This makes sense if you have sentences with different languages
|
||||
and you want to translate all to unified language. This is handy because it detects
|
||||
automatically the language of each sentence and then translate it.
|
||||
|
||||
@param sentences: list of sentences to translate
|
||||
@return: list of all translated sentences
|
||||
"""
|
||||
warn_msg = "deprecated. Use the translate_batch function instead"
|
||||
warnings.warn(warn_msg, DeprecationWarning, stacklevel=2)
|
||||
logging.warning(warn_msg)
|
||||
if not sentences:
|
||||
raise NotValidPayload(sentences)
|
||||
|
||||
translated_sentences = []
|
||||
try:
|
||||
for sentence in sentences:
|
||||
translated = self.translate(text=sentence, **kwargs)
|
||||
translated_sentences.append(translated)
|
||||
|
||||
return translated_sentences
|
||||
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def translate_file(self, path, **kwargs):
|
||||
"""
|
||||
translate directly from file
|
||||
@param path: path to the target file
|
||||
@type path: str
|
||||
@param kwargs: additional args
|
||||
@return: str
|
||||
"""
|
||||
try:
|
||||
with open(path) as f:
|
||||
text = f.read()
|
||||
|
||||
return self.translate(text=text)
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def translate_batch(self, batch=None):
|
||||
"""
|
||||
translate a list of texts
|
||||
@param batch: list of texts you want to translate
|
||||
@return: list of translations
|
||||
"""
|
||||
if not batch:
|
||||
raise Exception("Enter your text list that you want to translate")
|
||||
|
||||
arr = []
|
||||
for text in batch:
|
||||
translated = self.translate(text)
|
||||
arr.append(translated)
|
||||
sleep(2)
|
||||
|
||||
return arr
|
@ -0,0 +1,71 @@
|
||||
"""parent translator class"""
|
||||
|
||||
from deep_translator.exceptions import NotValidPayload, NotValidLength
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class BaseTranslator(ABC):
|
||||
"""
|
||||
Abstract class that serve as a parent translator for other different translators
|
||||
"""
|
||||
def __init__(self,
|
||||
base_url=None,
|
||||
source="auto",
|
||||
target="en",
|
||||
payload_key=None,
|
||||
element_tag=None,
|
||||
element_query=None,
|
||||
**url_params):
|
||||
"""
|
||||
@param source: source language to translate from
|
||||
@param target: target language to translate to
|
||||
"""
|
||||
self.__base_url = base_url
|
||||
self._source = source
|
||||
self._target = target
|
||||
self._url_params = url_params
|
||||
self._element_tag = element_tag
|
||||
self._element_query = element_query
|
||||
self.payload_key = payload_key
|
||||
self.headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) '
|
||||
'AppleWebit/535.19'
|
||||
'(KHTML, like Gecko) Chrome/18.0.1025.168 Safari/535.19'}
|
||||
super(BaseTranslator, self).__init__()
|
||||
|
||||
@staticmethod
|
||||
def _validate_payload(payload, min_chars=1, max_chars=5000):
|
||||
"""
|
||||
validate the target text to translate
|
||||
@param payload: text to translate
|
||||
@return: bool
|
||||
"""
|
||||
|
||||
if not payload or not isinstance(payload, str):
|
||||
raise NotValidPayload(payload)
|
||||
if not BaseTranslator.__check_length(payload, min_chars, max_chars):
|
||||
raise NotValidLength(payload, min_chars, max_chars)
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def __check_length(payload, min_chars, max_chars):
|
||||
"""
|
||||
check length of the provided target text to translate
|
||||
@param payload: text to translate
|
||||
@param min_chars: minimum characters allowed
|
||||
@param max_chars: maximum characters allowed
|
||||
@return: bool
|
||||
"""
|
||||
return True if min_chars < len(payload) < max_chars else False
|
||||
|
||||
@abstractmethod
|
||||
def translate(self, text, **kwargs):
|
||||
"""
|
||||
translate a text using a translator under the hood and return the translated text
|
||||
@param text: text to translate
|
||||
@param kwargs: additional arguments
|
||||
@return: str
|
||||
"""
|
||||
return NotImplemented('You need to implement the translate method!')
|
||||
|
||||
|
||||
|
@ -0,0 +1,136 @@
|
||||
"""
|
||||
pons translator API
|
||||
"""
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from deep_translator.constants import BASE_URLS, PONS_LANGUAGES_TO_CODES, PONS_CODES_TO_LANGUAGES
|
||||
from deep_translator.exceptions import (LanguageNotSupportedException,
|
||||
TranslationNotFound,
|
||||
NotValidPayload,
|
||||
ElementNotFoundInGetRequest,
|
||||
RequestError,
|
||||
TooManyRequests)
|
||||
from deep_translator.parent import BaseTranslator
|
||||
from requests.utils import requote_uri
|
||||
|
||||
|
||||
class PonsTranslator(BaseTranslator):
|
||||
"""
|
||||
class that uses PONS translator to translate words
|
||||
"""
|
||||
_languages = PONS_LANGUAGES_TO_CODES
|
||||
supported_languages = list(_languages.keys())
|
||||
|
||||
def __init__(self, source, target="english"):
|
||||
"""
|
||||
@param source: source language to translate from
|
||||
@param target: target language to translate to
|
||||
"""
|
||||
self.__base_url = BASE_URLS.get("PONS")
|
||||
|
||||
if self.is_language_supported(source, target):
|
||||
self._source, self._target = self._map_language_to_code(source, target)
|
||||
|
||||
super().__init__(base_url=self.__base_url,
|
||||
source=self._source,
|
||||
target=self._target,
|
||||
payload_key=None,
|
||||
element_tag='div',
|
||||
element_query={"class": "target"}
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_supported_languages(as_dict=False):
|
||||
"""
|
||||
return the supported languages by the linguee translator
|
||||
@param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
|
||||
@return: list or dict
|
||||
"""
|
||||
return PonsTranslator.supported_languages if not as_dict else PonsTranslator._languages
|
||||
|
||||
def _map_language_to_code(self, *languages, **kwargs):
|
||||
"""
|
||||
map language to its corresponding code (abbreviation) if the language was passed by its full name by the user
|
||||
@param languages: list of languages
|
||||
@return: mapped value of the language or raise an exception if the language is not supported
|
||||
"""
|
||||
for language in languages:
|
||||
if language in self._languages.values():
|
||||
yield PONS_CODES_TO_LANGUAGES[language]
|
||||
elif language in self._languages.keys():
|
||||
yield language
|
||||
else:
|
||||
raise LanguageNotSupportedException(language)
|
||||
|
||||
def is_language_supported(self, *languages, **kwargs):
|
||||
"""
|
||||
check if the language is supported by the translator
|
||||
@param languages: list of languages
|
||||
@return: bool or raise an Exception
|
||||
"""
|
||||
for lang in languages:
|
||||
if lang not in self._languages.keys():
|
||||
if lang not in self._languages.values():
|
||||
raise LanguageNotSupportedException(lang)
|
||||
return True
|
||||
|
||||
def translate(self, word, return_all=False, **kwargs):
|
||||
"""
|
||||
function that uses PONS to translate a word
|
||||
@param word: word to translate
|
||||
@type word: str
|
||||
@param return_all: set to True to return all synonym of the translated word
|
||||
@type return_all: bool
|
||||
@return: str: translated word
|
||||
"""
|
||||
if self._validate_payload(word, max_chars=50):
|
||||
url = "{}{}-{}/{}".format(self.__base_url, self._source, self._target, word)
|
||||
url = requote_uri(url)
|
||||
response = requests.get(url)
|
||||
|
||||
if response.status_code == 429:
|
||||
raise TooManyRequests()
|
||||
|
||||
if response.status_code != 200:
|
||||
raise RequestError()
|
||||
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
elements = soup.findAll(self._element_tag, self._element_query)
|
||||
|
||||
if not elements:
|
||||
raise ElementNotFoundInGetRequest(word)
|
||||
|
||||
filtered_elements = []
|
||||
for el in elements:
|
||||
temp = ''
|
||||
for e in el.findAll('a'):
|
||||
if e.parent.name == 'div':
|
||||
if e and "/translate/{}-{}/".format(self._target, self._source) in e.get('href'):
|
||||
temp += e.get_text() + ' '
|
||||
filtered_elements.append(temp)
|
||||
|
||||
if not filtered_elements:
|
||||
raise ElementNotFoundInGetRequest(word)
|
||||
|
||||
word_list = [word for word in filtered_elements if word and len(word) > 1]
|
||||
|
||||
if not word_list:
|
||||
raise TranslationNotFound(word)
|
||||
|
||||
return word_list if return_all else word_list[0]
|
||||
|
||||
def translate_words(self, words, **kwargs):
|
||||
"""
|
||||
translate a batch of words together by providing them in a list
|
||||
@param words: list of words you want to translate
|
||||
@param kwargs: additional args
|
||||
@return: list of translated words
|
||||
"""
|
||||
if not words:
|
||||
raise NotValidPayload(words)
|
||||
|
||||
translated_words = []
|
||||
for word in words:
|
||||
translated_words.append(self.translate(payload=word))
|
||||
return translated_words
|
||||
|
@ -0,0 +1,91 @@
|
||||
|
||||
import requests
|
||||
from requests.utils import requote_uri
|
||||
from deep_translator.constants import BASE_URLS
|
||||
from deep_translator.exceptions import (RequestError,
|
||||
ServerException, TranslationNotFound, TooManyRequests)
|
||||
|
||||
|
||||
class QCRI(object):
|
||||
"""
|
||||
class that wraps functions, which use the QRCI translator under the hood to translate word(s)
|
||||
"""
|
||||
|
||||
def __init__(self, api_key=None):
|
||||
"""
|
||||
@param api_key: your qrci api key. Get one for free here https://mt.qcri.org/api/v1/ref
|
||||
"""
|
||||
|
||||
if not api_key:
|
||||
raise ServerException(401)
|
||||
self.__base_url = BASE_URLS.get("QCRI")
|
||||
|
||||
self.api_key = api_key
|
||||
self.api_endpoints = {
|
||||
"get_languages": "getLanguagePairs",
|
||||
"get_domains": "getDomains",
|
||||
"translate": "translate",
|
||||
}
|
||||
|
||||
self.params = {
|
||||
"key": self.api_key
|
||||
}
|
||||
|
||||
def _get(self, endpoint, params=None, return_text=True):
|
||||
if not params:
|
||||
params = self.params
|
||||
try:
|
||||
res = requests.get(self.__base_url.format(endpoint=self.api_endpoints[endpoint]), params=params)
|
||||
return res.text if return_text else res
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def get_supported_languages(self):
|
||||
|
||||
pairs = self._get("get_languages")
|
||||
return pairs
|
||||
|
||||
@property
|
||||
def languages(self):
|
||||
return self.get_supported_languages()
|
||||
|
||||
def get_domains(self):
|
||||
domains = self._get("get_domains")
|
||||
return domains
|
||||
|
||||
@property
|
||||
def domains(self):
|
||||
return self.get_domains()
|
||||
|
||||
def translate(self, source, target, domain, text):
|
||||
params = {
|
||||
"key": self.api_key,
|
||||
"langpair": "{}-{}".format(source, target),
|
||||
"domain": domain,
|
||||
"text": text
|
||||
}
|
||||
try:
|
||||
response = self._get("translate", params=params, return_text=False)
|
||||
except ConnectionError:
|
||||
raise ServerException(503)
|
||||
|
||||
else:
|
||||
if response.status_code != 200:
|
||||
ServerException(response.status_code)
|
||||
else:
|
||||
res = response.json()
|
||||
translation = res["translatedText"]
|
||||
if not translation:
|
||||
raise TranslationNotFound(text)
|
||||
return translation
|
||||
|
||||
def translate_batch(self, source, target, domain, batch):
|
||||
"""
|
||||
translate a batch of texts
|
||||
@param source: source language
|
||||
@param target: target language
|
||||
@param batch: list of texts to translate
|
||||
@return: list of translations
|
||||
"""
|
||||
return [self.translate(source, target, domain, text) for text in batch]
|
||||
|
@ -0,0 +1 @@
|
||||
"""Unit test package for deep_translator."""
|
@ -0,0 +1,57 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""Tests for `deep_translator` package."""
|
||||
|
||||
import pytest
|
||||
from deep_translator import exceptions, GoogleTranslator
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def google_translator():
|
||||
"""Sample pytest fixture.
|
||||
|
||||
See more at: http://doc.pytest.org/en/latest/fixture.html
|
||||
"""
|
||||
return GoogleTranslator(target='en')
|
||||
|
||||
|
||||
def test_content(google_translator):
|
||||
"""Sample pytest test function with the pytest fixture as an argument."""
|
||||
# from bs4 import BeautifulSoup
|
||||
# assert 'GitHub' in BeautifulSoup(response.content).title.string
|
||||
assert google_translator.translate(text='좋은') == "good"
|
||||
|
||||
|
||||
def test_inputs():
|
||||
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||
GoogleTranslator(source="", target="")
|
||||
|
||||
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||
GoogleTranslator(source="auto", target="nothing")
|
||||
|
||||
# test abbreviations and languages
|
||||
g1 = GoogleTranslator("en", "fr")
|
||||
g2 = GoogleTranslator("english", "french")
|
||||
assert g1._source == g2._source
|
||||
assert g1._target == g2._target
|
||||
|
||||
|
||||
def test_payload(google_translator):
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
google_translator.translate(text="")
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
google_translator.translate(text=123)
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
google_translator.translate(text={})
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
google_translator.translate(text=[])
|
||||
|
||||
with pytest.raises(exceptions.NotValidLength):
|
||||
google_translator.translate("a"*5001)
|
||||
|
||||
#for _ in range(1):
|
||||
#assert google_translator.translate(text='좋은') == "good"
|
@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""Tests for `deep_translator` package."""
|
||||
|
||||
import pytest
|
||||
from deep_translator import exceptions, LingueeTranslator
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def linguee():
|
||||
return LingueeTranslator(source="english", target='french')
|
||||
|
||||
|
||||
def test_content(linguee):
|
||||
"""Sample pytest test function with the pytest fixture as an argument."""
|
||||
# from bs4 import BeautifulSoup
|
||||
# assert 'GitHub' in BeautifulSoup(response.content).title.string
|
||||
assert linguee.translate(word='good') is not None
|
||||
|
||||
|
||||
def test_inputs():
|
||||
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||
LingueeTranslator(source="", target="")
|
||||
|
||||
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||
LingueeTranslator(source="auto", target="nothing")
|
||||
|
||||
l1 = LingueeTranslator("en", "fr")
|
||||
l2 = LingueeTranslator("english", "french")
|
||||
assert l1._source == l2._source
|
||||
assert l1._target == l2._target
|
||||
|
||||
|
||||
def test_payload(linguee):
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
linguee.translate("")
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
linguee.translate(123)
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
linguee.translate({})
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
linguee.translate([])
|
||||
|
||||
with pytest.raises(exceptions.NotValidLength):
|
||||
linguee.translate("a"*51)
|
@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""Tests for `deep_translator` package."""
|
||||
|
||||
import pytest
|
||||
from deep_translator import exceptions, MyMemoryTranslator
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mymemory():
|
||||
return MyMemoryTranslator(source="en", target='fr')
|
||||
|
||||
|
||||
def test_content(mymemory):
|
||||
"""Sample pytest test function with the pytest fixture as an argument."""
|
||||
# from bs4 import BeautifulSoup
|
||||
# assert 'GitHub' in BeautifulSoup(response.content).title.string
|
||||
assert mymemory.translate(text='good') is not None
|
||||
|
||||
|
||||
def test_inputs():
|
||||
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||
MyMemoryTranslator(source="", target="")
|
||||
|
||||
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||
MyMemoryTranslator(source="auto", target="nothing")
|
||||
m1 = MyMemoryTranslator("en", "fr")
|
||||
m2 = MyMemoryTranslator("english", "french")
|
||||
assert m1._source == m2._source
|
||||
assert m1._target == m2._target
|
||||
|
||||
|
||||
def test_payload(mymemory):
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
mymemory.translate(text="")
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
mymemory.translate(text=123)
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
mymemory.translate(text={})
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
mymemory.translate(text=[])
|
||||
|
||||
with pytest.raises(exceptions.NotValidLength):
|
||||
mymemory.translate(text="a"*501)
|
@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""Tests for `deep_translator` package."""
|
||||
|
||||
import pytest
|
||||
from deep_translator import exceptions, PonsTranslator
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pons():
|
||||
return PonsTranslator(source="english", target='french')
|
||||
|
||||
|
||||
def test_content(pons):
|
||||
"""Sample pytest test function with the pytest fixture as an argument."""
|
||||
# from bs4 import BeautifulSoup
|
||||
# assert 'GitHub' in BeautifulSoup(response.content).title.string
|
||||
assert pons.translate(word='good') is not None
|
||||
|
||||
|
||||
def test_inputs():
|
||||
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||
PonsTranslator(source="", target="")
|
||||
|
||||
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||
PonsTranslator(source="auto", target="nothing")
|
||||
l1 = PonsTranslator("en", "fr")
|
||||
l2 = PonsTranslator("english", "french")
|
||||
assert l1._source == l2._source
|
||||
assert l1._target == l2._target
|
||||
|
||||
|
||||
def test_payload(pons):
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
pons.translate("")
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
pons.translate(123)
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
pons.translate({})
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
pons.translate([])
|
||||
|
||||
with pytest.raises(exceptions.NotValidLength):
|
||||
pons.translate("a" * 51)
|
@ -0,0 +1,3 @@
|
||||
"""
|
||||
utilities
|
||||
"""
|
@ -0,0 +1,132 @@
|
||||
"""
|
||||
Yandex translator API
|
||||
"""
|
||||
import requests
|
||||
from requests import exceptions
|
||||
from deep_translator.constants import BASE_URLS
|
||||
from deep_translator.exceptions import (RequestError,
|
||||
ServerException, TranslationNotFound, TooManyRequests)
|
||||
|
||||
|
||||
class YandexTranslator(object):
|
||||
"""
|
||||
class that wraps functions, which use the yandex translator under the hood to translate word(s)
|
||||
"""
|
||||
|
||||
def __init__(self, api_key=None):
|
||||
"""
|
||||
@param api_key: your yandex api key
|
||||
"""
|
||||
if not api_key:
|
||||
raise ServerException(401)
|
||||
self.__base_url = BASE_URLS.get("YANDEX")
|
||||
|
||||
self.api_key = api_key
|
||||
self.api_version = "v1.5"
|
||||
self.api_endpoints = {
|
||||
"langs": "getLangs",
|
||||
"detect": "detect",
|
||||
"translate": "translate",
|
||||
}
|
||||
|
||||
def get_supported_languages(self):
|
||||
return set(x.split("-")[0] for x in self.dirs)
|
||||
|
||||
@property
|
||||
def languages(self):
|
||||
return self.get_supported_languages()
|
||||
|
||||
@property
|
||||
def dirs(self, proxies=None):
|
||||
|
||||
try:
|
||||
url = self.__base_url.format(version=self.api_version, endpoint="getLangs")
|
||||
print("url: ", url)
|
||||
response = requests.get(url, params={"key": self.api_key}, proxies=proxies)
|
||||
except requests.exceptions.ConnectionError:
|
||||
raise ServerException(503)
|
||||
else:
|
||||
data = response.json()
|
||||
|
||||
if response.status_code != 200:
|
||||
raise ServerException(response.status_code)
|
||||
return data.get("dirs")
|
||||
|
||||
def detect(self, text, proxies=None):
|
||||
response = None
|
||||
params = {
|
||||
"text": text,
|
||||
"format": "plain",
|
||||
"key": self.api_key,
|
||||
}
|
||||
try:
|
||||
url = self.__base_url.format(version=self.api_version, endpoint="detect")
|
||||
response = requests.post(url, data=params, proxies=proxies)
|
||||
|
||||
except RequestError:
|
||||
raise
|
||||
except ConnectionError:
|
||||
raise ServerException(503)
|
||||
except ValueError:
|
||||
raise ServerException(response.status_code)
|
||||
else:
|
||||
response = response.json()
|
||||
language = response['lang']
|
||||
status_code = response['code']
|
||||
if status_code != 200:
|
||||
raise RequestError()
|
||||
elif not language:
|
||||
raise ServerException(501)
|
||||
return language
|
||||
|
||||
def translate(self, source, target, text, proxies=None):
|
||||
params = {
|
||||
"text": text,
|
||||
"format": "plain",
|
||||
"lang": target if source == "auto" else "{}-{}".format(source, target),
|
||||
"key": self.api_key
|
||||
}
|
||||
try:
|
||||
url = self.__base_url.format(version=self.api_version, endpoint="translate")
|
||||
response = requests.post(url, data=params, proxies=proxies)
|
||||
except ConnectionError:
|
||||
raise ServerException(503)
|
||||
else:
|
||||
response = response.json()
|
||||
|
||||
if response['code'] == 429:
|
||||
raise TooManyRequests()
|
||||
|
||||
if response['code'] != 200:
|
||||
raise ServerException(response['code'])
|
||||
|
||||
if not response['text']:
|
||||
raise TranslationNotFound()
|
||||
|
||||
return response['text']
|
||||
|
||||
def translate_file(self, source, target, path):
|
||||
"""
|
||||
translate from a file
|
||||
@param source: source language
|
||||
@param target: target language
|
||||
@param path: path to file
|
||||
@return: translated text
|
||||
"""
|
||||
try:
|
||||
with open(path) as f:
|
||||
text = f.read()
|
||||
|
||||
return self.translate(source, target, text)
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def translate_batch(self, source, target, batch):
|
||||
"""
|
||||
translate a batch of texts
|
||||
@param source: source language
|
||||
@param target: target language
|
||||
@param batch: list of texts to translate
|
||||
@return: list of translations
|
||||
"""
|
||||
return [self.translate(source, target, text) for text in batch]
|
Loading…
Reference in new issue