parent
6e3f4bf804
commit
49c6e8b3fb
@ -0,0 +1,25 @@
|
|||||||
|
"""Top-level package for deep_translator."""
|
||||||
|
|
||||||
|
from .google_trans import GoogleTranslator
|
||||||
|
from .pons import PonsTranslator
|
||||||
|
from .linguee import LingueeTranslator
|
||||||
|
from .mymemory import MyMemoryTranslator
|
||||||
|
from .yandex import YandexTranslator
|
||||||
|
from .qcri import QCRI
|
||||||
|
from .deepl import DeepL
|
||||||
|
from .detection import single_detection, batch_detection
|
||||||
|
|
||||||
|
|
||||||
|
__author__ = """Nidhal Baccouri"""
|
||||||
|
__email__ = 'nidhalbacc@gmail.com'
|
||||||
|
__version__ = '1.3.2'
|
||||||
|
|
||||||
|
__all__ = [GoogleTranslator,
|
||||||
|
PonsTranslator,
|
||||||
|
LingueeTranslator,
|
||||||
|
MyMemoryTranslator,
|
||||||
|
YandexTranslator,
|
||||||
|
QCRI,
|
||||||
|
DeepL,
|
||||||
|
single_detection,
|
||||||
|
batch_detection]
|
@ -0,0 +1,52 @@
|
|||||||
|
"""Console script for deep_translator."""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
from .google_trans import GoogleTranslator
|
||||||
|
from .mymemory import MyMemoryTranslator
|
||||||
|
from .pons import PonsTranslator
|
||||||
|
from .linguee import LingueeTranslator
|
||||||
|
|
||||||
|
|
||||||
|
def translate(args):
|
||||||
|
"""
|
||||||
|
function used to provide translations from the parsed terminal arguments
|
||||||
|
@param args: parsed terminal arguments
|
||||||
|
@return: None
|
||||||
|
"""
|
||||||
|
translator = None
|
||||||
|
if args.translator == 'google':
|
||||||
|
translator = GoogleTranslator(source=args.source, target=args.target)
|
||||||
|
elif args.translator == 'pons':
|
||||||
|
translator = PonsTranslator(source=args.source, target=args.target)
|
||||||
|
elif args.translator == 'linguee':
|
||||||
|
translator = LingueeTranslator(source=args.source, target=args.target)
|
||||||
|
elif args.translator == 'mymemory':
|
||||||
|
translator = MyMemoryTranslator(source=args.source, target=args.target)
|
||||||
|
else:
|
||||||
|
print("given translator is not supported. Please use a supported translator from the deep_translator tool")
|
||||||
|
|
||||||
|
res = translator.translate(args.text)
|
||||||
|
print(" | Translation from {} to {} |".format(args.source, args.target))
|
||||||
|
print("Translated text: \n {}".format(res))
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""
|
||||||
|
function responsible for parsing terminal arguments and provide them for further use in the translation process
|
||||||
|
|
||||||
|
"""
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('--translator', '-trans',
|
||||||
|
default='google', type=str, help="name of the translator you want to use")
|
||||||
|
parser.add_argument('--source', '-src', type=str, help="source language to translate from", required=True)
|
||||||
|
parser.add_argument('--target', '-tg', type=str, help="target language to translate to", required=True)
|
||||||
|
parser.add_argument('--text', '-txt', type=str, help="text you want to translate", required=True)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
translate(args)
|
||||||
|
# sys.exit()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
@ -0,0 +1,11 @@
|
|||||||
|
"""
|
||||||
|
configuration object that holds data about the language detection api
|
||||||
|
"""
|
||||||
|
|
||||||
|
config = {
|
||||||
|
"url": 'https://ws.detectlanguage.com/0.2/detect',
|
||||||
|
"headers": {
|
||||||
|
'User-Agent': 'Detect Language API Python Client 1.4.0',
|
||||||
|
'Authorization': 'Bearer {}',
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,183 @@
|
|||||||
|
|
||||||
|
|
||||||
|
BASE_URLS = {
|
||||||
|
"GOOGLE_TRANSLATE": "https://translate.google.com/m",
|
||||||
|
"PONS": "https://en.pons.com/translate/",
|
||||||
|
"YANDEX": "https://translate.yandex.net/api/{version}/tr.json/{endpoint}",
|
||||||
|
"LINGUEE": "https://www.linguee.com/",
|
||||||
|
"MYMEMORY": "http://api.mymemory.translated.net/get",
|
||||||
|
"QCRI": "https://mt.qcri.org/api/v1/{endpoint}?",
|
||||||
|
"DEEPL": "https://api.deepl.com/{version}/"
|
||||||
|
}
|
||||||
|
|
||||||
|
GOOGLE_CODES_TO_LANGUAGES = {
|
||||||
|
'af': 'afrikaans',
|
||||||
|
'sq': 'albanian',
|
||||||
|
'am': 'amharic',
|
||||||
|
'ar': 'arabic',
|
||||||
|
'hy': 'armenian',
|
||||||
|
'az': 'azerbaijani',
|
||||||
|
'eu': 'basque',
|
||||||
|
'be': 'belarusian',
|
||||||
|
'bn': 'bengali',
|
||||||
|
'bs': 'bosnian',
|
||||||
|
'bg': 'bulgarian',
|
||||||
|
'ca': 'catalan',
|
||||||
|
'ceb': 'cebuano',
|
||||||
|
'ny': 'chichewa',
|
||||||
|
'zh-cn': 'chinese (simplified)',
|
||||||
|
'zh-tw': 'chinese (traditional)',
|
||||||
|
'co': 'corsican',
|
||||||
|
'hr': 'croatian',
|
||||||
|
'cs': 'czech',
|
||||||
|
'da': 'danish',
|
||||||
|
'nl': 'dutch',
|
||||||
|
'en': 'english',
|
||||||
|
'eo': 'esperanto',
|
||||||
|
'et': 'estonian',
|
||||||
|
'tl': 'filipino',
|
||||||
|
'fi': 'finnish',
|
||||||
|
'fr': 'french',
|
||||||
|
'fy': 'frisian',
|
||||||
|
'gl': 'galician',
|
||||||
|
'ka': 'georgian',
|
||||||
|
'de': 'german',
|
||||||
|
'el': 'greek',
|
||||||
|
'gu': 'gujarati',
|
||||||
|
'ht': 'haitian creole',
|
||||||
|
'ha': 'hausa',
|
||||||
|
'haw': 'hawaiian',
|
||||||
|
'iw': 'hebrew',
|
||||||
|
'hi': 'hindi',
|
||||||
|
'hmn': 'hmong',
|
||||||
|
'hu': 'hungarian',
|
||||||
|
'is': 'icelandic',
|
||||||
|
'ig': 'igbo',
|
||||||
|
'id': 'indonesian',
|
||||||
|
'ga': 'irish',
|
||||||
|
'it': 'italian',
|
||||||
|
'ja': 'japanese',
|
||||||
|
'jw': 'javanese',
|
||||||
|
'kn': 'kannada',
|
||||||
|
'kk': 'kazakh',
|
||||||
|
'km': 'khmer',
|
||||||
|
'ko': 'korean',
|
||||||
|
'ku': 'kurdish (kurmanji)',
|
||||||
|
'ky': 'kyrgyz',
|
||||||
|
'lo': 'lao',
|
||||||
|
'la': 'latin',
|
||||||
|
'lv': 'latvian',
|
||||||
|
'lt': 'lithuanian',
|
||||||
|
'lb': 'luxembourgish',
|
||||||
|
'mk': 'macedonian',
|
||||||
|
'mg': 'malagasy',
|
||||||
|
'ms': 'malay',
|
||||||
|
'ml': 'malayalam',
|
||||||
|
'mt': 'maltese',
|
||||||
|
'mi': 'maori',
|
||||||
|
'mr': 'marathi',
|
||||||
|
'mn': 'mongolian',
|
||||||
|
'my': 'myanmar (burmese)',
|
||||||
|
'ne': 'nepali',
|
||||||
|
'no': 'norwegian',
|
||||||
|
'ps': 'pashto',
|
||||||
|
'fa': 'persian',
|
||||||
|
'pl': 'polish',
|
||||||
|
'pt': 'portuguese',
|
||||||
|
'pa': 'punjabi',
|
||||||
|
'ro': 'romanian',
|
||||||
|
'ru': 'russian',
|
||||||
|
'sm': 'samoan',
|
||||||
|
'gd': 'scots gaelic',
|
||||||
|
'sr': 'serbian',
|
||||||
|
'st': 'sesotho',
|
||||||
|
'sn': 'shona',
|
||||||
|
'sd': 'sindhi',
|
||||||
|
'si': 'sinhala',
|
||||||
|
'sk': 'slovak',
|
||||||
|
'sl': 'slovenian',
|
||||||
|
'so': 'somali',
|
||||||
|
'es': 'spanish',
|
||||||
|
'su': 'sundanese',
|
||||||
|
'sw': 'swahili',
|
||||||
|
'sv': 'swedish',
|
||||||
|
'tg': 'tajik',
|
||||||
|
'ta': 'tamil',
|
||||||
|
'te': 'telugu',
|
||||||
|
'th': 'thai',
|
||||||
|
'tr': 'turkish',
|
||||||
|
'uk': 'ukrainian',
|
||||||
|
'ur': 'urdu',
|
||||||
|
'uz': 'uzbek',
|
||||||
|
'vi': 'vietnamese',
|
||||||
|
'cy': 'welsh',
|
||||||
|
'xh': 'xhosa',
|
||||||
|
'yi': 'yiddish',
|
||||||
|
'yo': 'yoruba',
|
||||||
|
'zu': 'zulu',
|
||||||
|
'fil': 'Filipino',
|
||||||
|
'he': 'Hebrew'
|
||||||
|
}
|
||||||
|
|
||||||
|
GOOGLE_LANGUAGES_TO_CODES = {v: k for k, v in GOOGLE_CODES_TO_LANGUAGES.items()}
|
||||||
|
|
||||||
|
PONS_CODES_TO_LANGUAGES = {
|
||||||
|
'ar': 'arabic',
|
||||||
|
'bg': 'bulgarian',
|
||||||
|
'zh-cn': 'chinese',
|
||||||
|
'cs': 'czech',
|
||||||
|
'da': 'danish',
|
||||||
|
'nl': 'dutch',
|
||||||
|
'en': 'english',
|
||||||
|
'fr': 'french',
|
||||||
|
'de': 'german',
|
||||||
|
'el': 'greek',
|
||||||
|
'hu': 'hungarian',
|
||||||
|
'it': 'italian',
|
||||||
|
'la': 'latin',
|
||||||
|
'no': 'norwegian',
|
||||||
|
'pl': 'polish',
|
||||||
|
'pt': 'portuguese',
|
||||||
|
'ru': 'russian',
|
||||||
|
'sl': 'slovenian',
|
||||||
|
'es': 'spanish',
|
||||||
|
'sv': 'swedish',
|
||||||
|
'tr': 'turkish',
|
||||||
|
'elv': 'elvish'
|
||||||
|
}
|
||||||
|
|
||||||
|
PONS_LANGUAGES_TO_CODES = {v: k for k, v in PONS_CODES_TO_LANGUAGES.items()}
|
||||||
|
|
||||||
|
LINGUEE_LANGUAGES_TO_CODES = {
|
||||||
|
"maltese": "mt",
|
||||||
|
"english": "en",
|
||||||
|
"german": "de",
|
||||||
|
"bulgarian": "bg",
|
||||||
|
"polish": "pl",
|
||||||
|
"portuguese": "pt",
|
||||||
|
"hungarian": "hu",
|
||||||
|
"romanian": "ro",
|
||||||
|
"russian": "ru",
|
||||||
|
#"serbian": "sr",
|
||||||
|
"dutch": "nl",
|
||||||
|
"slovakian": "sk",
|
||||||
|
"greek": "el",
|
||||||
|
"slovenian": "sl",
|
||||||
|
"danish": "da",
|
||||||
|
"italian": "it",
|
||||||
|
"spanish": "es",
|
||||||
|
"finnish": "fi",
|
||||||
|
"chinese": "zh",
|
||||||
|
"french": "fr",
|
||||||
|
#"croatian": "hr",
|
||||||
|
"czech": "cs",
|
||||||
|
"laotian": "lo",
|
||||||
|
"swedish": "sv",
|
||||||
|
"latvian": "lv",
|
||||||
|
"estonian": "et",
|
||||||
|
"japanese": "ja"
|
||||||
|
}
|
||||||
|
|
||||||
|
LINGUEE_CODE_TO_LANGUAGE = {v: k for k, v in LINGUEE_LANGUAGES_TO_CODES.items()}
|
||||||
|
|
||||||
|
# "72e9e2cc7c992db4dcbdd6fb9f91a0d1"
|
@ -0,0 +1,59 @@
|
|||||||
|
|
||||||
|
import requests
|
||||||
|
from requests.utils import requote_uri
|
||||||
|
from deep_translator.constants import BASE_URLS
|
||||||
|
from deep_translator.exceptions import (RequestError,
|
||||||
|
ServerException, TranslationNotFound, TooManyRequests)
|
||||||
|
|
||||||
|
|
||||||
|
class DeepL(object):
|
||||||
|
"""
|
||||||
|
class that wraps functions, which use the DeepL translator under the hood to translate word(s)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, api_key=None):
|
||||||
|
"""
|
||||||
|
@param api_key: your DeepL api key. Get one here: https://www.deepl.com/docs-api/accessing-the-api/
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not api_key:
|
||||||
|
raise ServerException(401)
|
||||||
|
self.version = 'v2'
|
||||||
|
self.api_key = api_key
|
||||||
|
self.__base_url = BASE_URLS.get("DEEPL").format(version=self.version)
|
||||||
|
|
||||||
|
def translate(self, source, target, text):
|
||||||
|
params = {
|
||||||
|
"auth_key": self.api_key,
|
||||||
|
"target_lang": target,
|
||||||
|
"source_lang": source,
|
||||||
|
"text": text
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
response = requests.get(self.__base_url, params=params)
|
||||||
|
except ConnectionError:
|
||||||
|
raise ServerException(503)
|
||||||
|
|
||||||
|
else:
|
||||||
|
if response.status_code != 200:
|
||||||
|
ServerException(response.status_code)
|
||||||
|
else:
|
||||||
|
res = response.json()
|
||||||
|
if not res:
|
||||||
|
raise TranslationNotFound(text)
|
||||||
|
return res
|
||||||
|
|
||||||
|
def translate_batch(self, source, target, batch):
|
||||||
|
"""
|
||||||
|
translate a batch of texts
|
||||||
|
@param source: source language
|
||||||
|
@param target: target language
|
||||||
|
@param batch: list of texts to translate
|
||||||
|
@return: list of translations
|
||||||
|
"""
|
||||||
|
return [self.translate(source, target, text) for text in batch]
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
d = DeepL(api_key="key")
|
||||||
|
print(d)
|
@ -0,0 +1,76 @@
|
|||||||
|
"""
|
||||||
|
language detection API
|
||||||
|
"""
|
||||||
|
import requests
|
||||||
|
from deep_translator.configs import config
|
||||||
|
from requests.exceptions import HTTPError
|
||||||
|
|
||||||
|
|
||||||
|
def get_request_body(text, api_key, *args):
|
||||||
|
"""
|
||||||
|
send a request and return the response body parsed as dictionary
|
||||||
|
|
||||||
|
@param text: target text that you want to detect its language
|
||||||
|
@type text: str
|
||||||
|
@type api_key: str
|
||||||
|
@param api_key: your private API key
|
||||||
|
|
||||||
|
"""
|
||||||
|
if not api_key:
|
||||||
|
raise Exception("you need to get an API_KEY for this to work. "
|
||||||
|
"Get one for free here: https://detectlanguage.com/documentation")
|
||||||
|
if not text:
|
||||||
|
raise Exception("Please provide an input text")
|
||||||
|
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
headers = config['headers']
|
||||||
|
headers['Authorization'] = headers['Authorization'].format(api_key)
|
||||||
|
response = requests.post(config['url'],
|
||||||
|
json={'q': text},
|
||||||
|
headers=headers)
|
||||||
|
|
||||||
|
body = response.json().get('data')
|
||||||
|
return body
|
||||||
|
|
||||||
|
except HTTPError as e:
|
||||||
|
print("Error occured while requesting from server: ", e.args)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
def single_detection(text, api_key=None, detailed=False, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
function responsible for detecting the language from a text
|
||||||
|
|
||||||
|
@param text: target text that you want to detect its language
|
||||||
|
@type text: str
|
||||||
|
@type api_key: str
|
||||||
|
@param api_key: your private API key
|
||||||
|
@param detailed: set to True if you want to get detailed information about the detection process
|
||||||
|
"""
|
||||||
|
body = get_request_body(text, api_key)
|
||||||
|
detections = body.get('detections')
|
||||||
|
if detailed:
|
||||||
|
return detections[0]
|
||||||
|
|
||||||
|
lang = detections[0].get('language', None)
|
||||||
|
if lang:
|
||||||
|
return lang
|
||||||
|
|
||||||
|
|
||||||
|
def batch_detection(text_list, api_key, detailed=False, *args):
|
||||||
|
"""
|
||||||
|
function responsible for detecting the language from a text
|
||||||
|
|
||||||
|
@param text_list: target batch that you want to detect its language
|
||||||
|
@param api_key: your private API key
|
||||||
|
@param detailed: set to True if you want to get detailed information about the detection process
|
||||||
|
"""
|
||||||
|
body = get_request_body(text_list, api_key)
|
||||||
|
detections = body.get('detections')
|
||||||
|
res = [obj[0] for obj in detections]
|
||||||
|
if detailed:
|
||||||
|
return res
|
||||||
|
else:
|
||||||
|
return [obj['language'] for obj in res]
|
||||||
|
|
@ -0,0 +1,113 @@
|
|||||||
|
class BaseError(Exception):
|
||||||
|
"""
|
||||||
|
base error structure class
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, val, message):
|
||||||
|
"""
|
||||||
|
@param val: actual value
|
||||||
|
@param message: message shown to the user
|
||||||
|
"""
|
||||||
|
self.val = val
|
||||||
|
self.message = message
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "{} --> {}".format(self.val, self.message)
|
||||||
|
|
||||||
|
|
||||||
|
class LanguageNotSupportedException(BaseError):
|
||||||
|
"""
|
||||||
|
exception thrown if the user uses a language that is not supported by the deep_translator
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, val, message="There is no support for the chosen language"):
|
||||||
|
super().__init__(val, message)
|
||||||
|
|
||||||
|
|
||||||
|
class NotValidPayload(BaseError):
|
||||||
|
"""
|
||||||
|
exception thrown if the user enters an invalid payload
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
val,
|
||||||
|
message='text must be a valid text with maximum 5000 character, otherwise it cannot be translated'):
|
||||||
|
super(NotValidPayload, self).__init__(val, message)
|
||||||
|
|
||||||
|
|
||||||
|
class TranslationNotFound(BaseError):
|
||||||
|
"""
|
||||||
|
exception thrown if no translation was found for the text provided by the user
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
val,
|
||||||
|
message='No translation was found using the current translator. Try another translator?'):
|
||||||
|
super(TranslationNotFound, self).__init__(val, message)
|
||||||
|
|
||||||
|
|
||||||
|
class ElementNotFoundInGetRequest(BaseError):
|
||||||
|
"""
|
||||||
|
exception thrown if the html element was not found in the body parsed by beautifulsoup
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
val,
|
||||||
|
message='Required element was not found in the API response'):
|
||||||
|
super(ElementNotFoundInGetRequest, self).__init__(val, message)
|
||||||
|
|
||||||
|
|
||||||
|
class NotValidLength(BaseError):
|
||||||
|
"""
|
||||||
|
exception thrown if the provided text exceed the length limit of the translator
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, val, min_chars, max_chars):
|
||||||
|
message = "Text length need to be between {} and {} characters".format(min_chars, max_chars)
|
||||||
|
super(NotValidLength, self).__init__(val, message)
|
||||||
|
|
||||||
|
|
||||||
|
class RequestError(Exception):
|
||||||
|
"""
|
||||||
|
exception thrown if an error occured during the request call, e.g a connection problem.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, message="Request exception can happen due to an api connection error. "
|
||||||
|
"Please check your connection and try again"):
|
||||||
|
self.message = message
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.message
|
||||||
|
|
||||||
|
|
||||||
|
class TooManyRequests(Exception):
|
||||||
|
"""
|
||||||
|
exception thrown if an error occured during the request call, e.g a connection problem.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, message="Server Error: You made too many requests to the server. According to google, you are allowed to make 5 requests per second and up to 200k requests per day. You can wait and try again later or you can try the translate_batch function"):
|
||||||
|
self.message = message
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.message
|
||||||
|
|
||||||
|
|
||||||
|
class ServerException(Exception):
|
||||||
|
"""
|
||||||
|
Default YandexTranslate exception from the official website
|
||||||
|
"""
|
||||||
|
errors = {
|
||||||
|
401: "ERR_KEY_INVALID",
|
||||||
|
402: "ERR_KEY_BLOCKED",
|
||||||
|
403: "ERR_DAILY_REQ_LIMIT_EXCEEDED",
|
||||||
|
404: "ERR_DAILY_CHAR_LIMIT_EXCEEDED",
|
||||||
|
413: "ERR_TEXT_TOO_LONG",
|
||||||
|
422: "ERR_UNPROCESSABLE_TEXT",
|
||||||
|
501: "ERR_LANG_NOT_SUPPORTED",
|
||||||
|
503: "ERR_SERVICE_NOT_AVAIBLE",
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, status_code, *args):
|
||||||
|
message = self.errors.get(status_code, "API server error")
|
||||||
|
super(ServerException, self).__init__(message, *args)
|
@ -0,0 +1,173 @@
|
|||||||
|
"""
|
||||||
|
google translator API
|
||||||
|
"""
|
||||||
|
|
||||||
|
from deep_translator.constants import BASE_URLS, GOOGLE_LANGUAGES_TO_CODES
|
||||||
|
from deep_translator.exceptions import TooManyRequests, LanguageNotSupportedException, TranslationNotFound, NotValidPayload, RequestError
|
||||||
|
from deep_translator.parent import BaseTranslator
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import requests
|
||||||
|
from time import sleep
|
||||||
|
import warnings
|
||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
class GoogleTranslator(BaseTranslator):
|
||||||
|
"""
|
||||||
|
class that wraps functions, which use google translate under the hood to translate text(s)
|
||||||
|
"""
|
||||||
|
_languages = GOOGLE_LANGUAGES_TO_CODES
|
||||||
|
supported_languages = list(_languages.keys())
|
||||||
|
|
||||||
|
def __init__(self, source="auto", target="en"):
|
||||||
|
"""
|
||||||
|
@param source: source language to translate from
|
||||||
|
@param target: target language to translate to
|
||||||
|
"""
|
||||||
|
self.__base_url = BASE_URLS.get("GOOGLE_TRANSLATE")
|
||||||
|
|
||||||
|
if self.is_language_supported(source, target):
|
||||||
|
self._source, self._target = self._map_language_to_code(source.lower(), target.lower())
|
||||||
|
|
||||||
|
super(GoogleTranslator, self).__init__(base_url=self.__base_url,
|
||||||
|
source=self._source,
|
||||||
|
target=self._target,
|
||||||
|
element_tag='div',
|
||||||
|
element_query={"class": "t0"},
|
||||||
|
payload_key='q', # key of text in the url
|
||||||
|
hl=self._target,
|
||||||
|
sl=self._source)
|
||||||
|
|
||||||
|
self._alt_element_query = {"class": "result-container"}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_supported_languages(as_dict=False):
|
||||||
|
"""
|
||||||
|
return the supported languages by the google translator
|
||||||
|
@param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
|
||||||
|
@return: list or dict
|
||||||
|
"""
|
||||||
|
return GoogleTranslator.supported_languages if not as_dict else GoogleTranslator._languages
|
||||||
|
|
||||||
|
def _map_language_to_code(self, *languages):
|
||||||
|
"""
|
||||||
|
map language to its corresponding code (abbreviation) if the language was passed by its full name by the user
|
||||||
|
@param languages: list of languages
|
||||||
|
@return: mapped value of the language or raise an exception if the language is not supported
|
||||||
|
"""
|
||||||
|
for language in languages:
|
||||||
|
if language in self._languages.values() or language == 'auto':
|
||||||
|
yield language
|
||||||
|
elif language in self._languages.keys():
|
||||||
|
yield self._languages[language]
|
||||||
|
else:
|
||||||
|
raise LanguageNotSupportedException(language)
|
||||||
|
|
||||||
|
def is_language_supported(self, *languages):
|
||||||
|
"""
|
||||||
|
check if the language is supported by the translator
|
||||||
|
@param languages: list of languages
|
||||||
|
@return: bool or raise an Exception
|
||||||
|
"""
|
||||||
|
for lang in languages:
|
||||||
|
if lang != 'auto' and lang not in self._languages.keys():
|
||||||
|
if lang != 'auto' and lang not in self._languages.values():
|
||||||
|
raise LanguageNotSupportedException(lang)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def translate(self, text, **kwargs):
|
||||||
|
"""
|
||||||
|
function that uses google translate to translate a text
|
||||||
|
@param text: desired text to translate
|
||||||
|
@return: str: translated text
|
||||||
|
"""
|
||||||
|
|
||||||
|
if self._validate_payload(text):
|
||||||
|
text = text.strip()
|
||||||
|
|
||||||
|
if self.payload_key:
|
||||||
|
self._url_params[self.payload_key] = text
|
||||||
|
|
||||||
|
response = requests.get(self.__base_url,
|
||||||
|
params=self._url_params, headers ={'User-agent': 'your bot 0.1'})
|
||||||
|
|
||||||
|
if response.status_code == 429:
|
||||||
|
raise TooManyRequests()
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
# print("status code", response.status_code)
|
||||||
|
raise RequestError()
|
||||||
|
|
||||||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
element = soup.find(self._element_tag, self._element_query)
|
||||||
|
|
||||||
|
if not element:
|
||||||
|
element = soup.find(self._element_tag, self._alt_element_query)
|
||||||
|
if not element:
|
||||||
|
raise TranslationNotFound(text)
|
||||||
|
|
||||||
|
return element.get_text(strip=True)
|
||||||
|
|
||||||
|
def translate_file(self, path, **kwargs):
|
||||||
|
"""
|
||||||
|
translate directly from file
|
||||||
|
@param path: path to the target file
|
||||||
|
@type path: str
|
||||||
|
@param kwargs: additional args
|
||||||
|
@return: str
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with open(path) as f:
|
||||||
|
text = f.read()
|
||||||
|
|
||||||
|
return self.translate(text=text)
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
def translate_sentences(self, sentences=None, **kwargs):
|
||||||
|
"""
|
||||||
|
translate many sentences together. This makes sense if you have sentences with different languages
|
||||||
|
and you want to translate all to unified language. This is handy because it detects
|
||||||
|
automatically the language of each sentence and then translate it.
|
||||||
|
|
||||||
|
@param sentences: list of sentences to translate
|
||||||
|
@return: list of all translated sentences
|
||||||
|
"""
|
||||||
|
warnings.warn("deprecated. Use the translate_batch function instead", DeprecationWarning, stacklevel=2)
|
||||||
|
logging.warning("deprecated. Use the translate_batch function instead")
|
||||||
|
if not sentences:
|
||||||
|
raise NotValidPayload(sentences)
|
||||||
|
|
||||||
|
translated_sentences = []
|
||||||
|
try:
|
||||||
|
for sentence in sentences:
|
||||||
|
translated = self.translate(text=sentence)
|
||||||
|
translated_sentences.append(translated)
|
||||||
|
|
||||||
|
return translated_sentences
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
def translate_batch(self, batch=None):
|
||||||
|
"""
|
||||||
|
translate a list of texts
|
||||||
|
@param batch: list of texts you want to translate
|
||||||
|
@return: list of translations
|
||||||
|
"""
|
||||||
|
if not batch:
|
||||||
|
raise Exception("Enter your text list that you want to translate")
|
||||||
|
|
||||||
|
arr = []
|
||||||
|
for text in batch:
|
||||||
|
translated = self.translate(text)
|
||||||
|
arr.append(translated)
|
||||||
|
sleep(2)
|
||||||
|
|
||||||
|
return arr
|
||||||
|
|
||||||
|
|
||||||
|
# if __name__ == '__main__':
|
||||||
|
# for _ in range(10):
|
||||||
|
# txt = GoogleTranslator(source="en", target="ar").translate("Hello how are you")
|
||||||
|
# print("text: ", txt)
|
@ -0,0 +1,130 @@
|
|||||||
|
"""
|
||||||
|
linguee translator API
|
||||||
|
"""
|
||||||
|
|
||||||
|
from deep_translator.constants import BASE_URLS, LINGUEE_LANGUAGES_TO_CODES, LINGUEE_CODE_TO_LANGUAGE
|
||||||
|
from deep_translator.exceptions import (LanguageNotSupportedException,
|
||||||
|
TranslationNotFound,
|
||||||
|
NotValidPayload,
|
||||||
|
ElementNotFoundInGetRequest,
|
||||||
|
RequestError,
|
||||||
|
TooManyRequests)
|
||||||
|
from deep_translator.parent import BaseTranslator
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import requests
|
||||||
|
from requests.utils import requote_uri
|
||||||
|
|
||||||
|
|
||||||
|
class LingueeTranslator(BaseTranslator):
|
||||||
|
"""
|
||||||
|
class that wraps functions, which use the linguee translator under the hood to translate word(s)
|
||||||
|
"""
|
||||||
|
_languages = LINGUEE_LANGUAGES_TO_CODES
|
||||||
|
supported_languages = list(_languages.keys())
|
||||||
|
|
||||||
|
def __init__(self, source, target="en"):
|
||||||
|
"""
|
||||||
|
@param source: source language to translate from
|
||||||
|
@param target: target language to translate to
|
||||||
|
"""
|
||||||
|
self.__base_url = BASE_URLS.get("LINGUEE")
|
||||||
|
|
||||||
|
if self.is_language_supported(source, target):
|
||||||
|
self._source, self._target = self._map_language_to_code(source.lower(), target.lower())
|
||||||
|
|
||||||
|
super().__init__(base_url=self.__base_url,
|
||||||
|
source=self._source,
|
||||||
|
target=self._target,
|
||||||
|
element_tag='a',
|
||||||
|
element_query={'class': 'dictLink featured'},
|
||||||
|
payload_key=None, # key of text in the url
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_supported_languages(as_dict=False):
|
||||||
|
"""
|
||||||
|
return the supported languages by the linguee translator
|
||||||
|
@param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
|
||||||
|
@return: list or dict
|
||||||
|
"""
|
||||||
|
return LingueeTranslator.supported_languages if not as_dict else LingueeTranslator._languages
|
||||||
|
|
||||||
|
def _map_language_to_code(self, *languages, **kwargs):
|
||||||
|
"""
|
||||||
|
map language to its corresponding code (abbreviation) if the language was passed by its full name by the user
|
||||||
|
@param languages: list of languages
|
||||||
|
@return: mapped value of the language or raise an exception if the language is not supported
|
||||||
|
"""
|
||||||
|
for language in languages:
|
||||||
|
if language in self._languages.values():
|
||||||
|
yield LINGUEE_CODE_TO_LANGUAGE[language]
|
||||||
|
elif language in self._languages.keys():
|
||||||
|
yield language
|
||||||
|
else:
|
||||||
|
raise LanguageNotSupportedException(language)
|
||||||
|
|
||||||
|
def is_language_supported(self, *languages, **kwargs):
|
||||||
|
"""
|
||||||
|
check if the language is supported by the translator
|
||||||
|
@param languages: list of languages
|
||||||
|
@return: bool or raise an Exception
|
||||||
|
"""
|
||||||
|
for lang in languages:
|
||||||
|
if lang not in self._languages.keys():
|
||||||
|
if lang not in self._languages.values():
|
||||||
|
raise LanguageNotSupportedException(lang)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def translate(self, word, return_all=False, **kwargs):
|
||||||
|
"""
|
||||||
|
function that uses linguee to translate a word
|
||||||
|
@param word: word to translate
|
||||||
|
@type word: str
|
||||||
|
@param return_all: set to True to return all synonym of the translated word
|
||||||
|
@type return_all: bool
|
||||||
|
@return: str: translated word
|
||||||
|
"""
|
||||||
|
if self._validate_payload(word, max_chars=50):
|
||||||
|
# %s-%s/translation/%s.html
|
||||||
|
url = "{}{}-{}/translation/{}.html".format(self.__base_url, self._source, self._target, word)
|
||||||
|
url = requote_uri(url)
|
||||||
|
response = requests.get(url)
|
||||||
|
|
||||||
|
if response.status_code == 429:
|
||||||
|
raise TooManyRequests()
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise RequestError()
|
||||||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
elements = soup.find_all(self._element_tag, self._element_query)
|
||||||
|
if not elements:
|
||||||
|
raise ElementNotFoundInGetRequest(elements)
|
||||||
|
|
||||||
|
filtered_elements = []
|
||||||
|
for el in elements:
|
||||||
|
try:
|
||||||
|
pronoun = el.find('span', {'class': 'placeholder'}).get_text(strip=True)
|
||||||
|
except AttributeError:
|
||||||
|
pronoun = ''
|
||||||
|
filtered_elements.append(el.get_text(strip=True).replace(pronoun, ''))
|
||||||
|
|
||||||
|
if not filtered_elements:
|
||||||
|
raise TranslationNotFound(word)
|
||||||
|
|
||||||
|
return filtered_elements if return_all else filtered_elements[0]
|
||||||
|
|
||||||
|
def translate_words(self, words, **kwargs):
|
||||||
|
"""
|
||||||
|
translate a batch of words together by providing them in a list
|
||||||
|
@param words: list of words you want to translate
|
||||||
|
@param kwargs: additional args
|
||||||
|
@return: list of translated words
|
||||||
|
"""
|
||||||
|
if not words:
|
||||||
|
raise NotValidPayload(words)
|
||||||
|
|
||||||
|
translated_words = []
|
||||||
|
for word in words:
|
||||||
|
translated_words.append(self.translate(payload=word))
|
||||||
|
return translated_words
|
||||||
|
|
@ -0,0 +1,174 @@
|
|||||||
|
"""
|
||||||
|
mymemory translator API
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
from deep_translator.constants import BASE_URLS, GOOGLE_LANGUAGES_TO_CODES
|
||||||
|
from deep_translator.exceptions import (NotValidPayload,
|
||||||
|
TranslationNotFound,
|
||||||
|
LanguageNotSupportedException,
|
||||||
|
RequestError,
|
||||||
|
TooManyRequests)
|
||||||
|
from deep_translator.parent import BaseTranslator
|
||||||
|
import requests
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
|
|
||||||
|
class MyMemoryTranslator(BaseTranslator):
|
||||||
|
"""
|
||||||
|
class that uses the mymemory translator to translate texts
|
||||||
|
"""
|
||||||
|
_languages = GOOGLE_LANGUAGES_TO_CODES
|
||||||
|
supported_languages = list(_languages.keys())
|
||||||
|
|
||||||
|
def __init__(self, source="auto", target="en", **kwargs):
|
||||||
|
"""
|
||||||
|
@param source: source language to translate from
|
||||||
|
@param target: target language to translate to
|
||||||
|
"""
|
||||||
|
self.__base_url = BASE_URLS.get("MYMEMORY")
|
||||||
|
if self.is_language_supported(source, target):
|
||||||
|
self._source, self._target = self._map_language_to_code(source.lower(), target.lower())
|
||||||
|
self._source = self._source if self._source != 'auto' else 'Lao'
|
||||||
|
|
||||||
|
self.email = kwargs.get('email', None)
|
||||||
|
super(MyMemoryTranslator, self).__init__(base_url=self.__base_url,
|
||||||
|
source=self._source,
|
||||||
|
target=self._target,
|
||||||
|
payload_key='q',
|
||||||
|
langpair='{}|{}'.format(self._source, self._target))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_supported_languages(as_dict=False):
|
||||||
|
"""
|
||||||
|
return the supported languages by the mymemory translator
|
||||||
|
@param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
|
||||||
|
@return: list or dict
|
||||||
|
"""
|
||||||
|
return MyMemoryTranslator.supported_languages if not as_dict else MyMemoryTranslator._languages
|
||||||
|
|
||||||
|
def _map_language_to_code(self, *languages):
|
||||||
|
"""
|
||||||
|
map language to its corresponding code (abbreviation) if the language was passed by its full name by the user
|
||||||
|
@param languages: list of languages
|
||||||
|
@return: mapped value of the language or raise an exception if the language is not supported
|
||||||
|
"""
|
||||||
|
for language in languages:
|
||||||
|
if language in self._languages.values() or language == 'auto':
|
||||||
|
yield language
|
||||||
|
elif language in self._languages.keys():
|
||||||
|
yield self._languages[language]
|
||||||
|
else:
|
||||||
|
raise LanguageNotSupportedException(language)
|
||||||
|
|
||||||
|
def is_language_supported(self, *languages):
|
||||||
|
"""
|
||||||
|
check if the language is supported by the translator
|
||||||
|
@param languages: list of languages
|
||||||
|
@return: bool or raise an Exception
|
||||||
|
"""
|
||||||
|
for lang in languages:
|
||||||
|
if lang != 'auto' and lang not in self._languages.keys():
|
||||||
|
if lang != 'auto' and lang not in self._languages.values():
|
||||||
|
raise LanguageNotSupportedException(lang)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def translate(self, text, return_all=False, **kwargs):
|
||||||
|
"""
|
||||||
|
function that uses the mymemory translator to translate a text
|
||||||
|
@param text: desired text to translate
|
||||||
|
@type text: str
|
||||||
|
@param return_all: set to True to return all synonym/similars of the translated text
|
||||||
|
@return: str or list
|
||||||
|
"""
|
||||||
|
|
||||||
|
if self._validate_payload(text, max_chars=500):
|
||||||
|
text = text.strip()
|
||||||
|
|
||||||
|
if self.payload_key:
|
||||||
|
self._url_params[self.payload_key] = text
|
||||||
|
if self.email:
|
||||||
|
self._url_params['de'] = self.email
|
||||||
|
|
||||||
|
response = requests.get(self.__base_url,
|
||||||
|
params=self._url_params,
|
||||||
|
headers=self.headers)
|
||||||
|
|
||||||
|
if response.status_code == 429:
|
||||||
|
raise TooManyRequests()
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise RequestError()
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
if not data:
|
||||||
|
TranslationNotFound(text)
|
||||||
|
|
||||||
|
translation = data.get('responseData').get('translatedText')
|
||||||
|
if translation:
|
||||||
|
return translation
|
||||||
|
|
||||||
|
elif not translation:
|
||||||
|
all_matches = data.get('matches')
|
||||||
|
matches = (match['translation'] for match in all_matches)
|
||||||
|
next_match = next(matches)
|
||||||
|
return next_match if not return_all else list(all_matches)
|
||||||
|
|
||||||
|
def translate_sentences(self, sentences=None, **kwargs):
|
||||||
|
"""
|
||||||
|
translate many sentences together. This makes sense if you have sentences with different languages
|
||||||
|
and you want to translate all to unified language. This is handy because it detects
|
||||||
|
automatically the language of each sentence and then translate it.
|
||||||
|
|
||||||
|
@param sentences: list of sentences to translate
|
||||||
|
@return: list of all translated sentences
|
||||||
|
"""
|
||||||
|
warn_msg = "deprecated. Use the translate_batch function instead"
|
||||||
|
warnings.warn(warn_msg, DeprecationWarning, stacklevel=2)
|
||||||
|
logging.warning(warn_msg)
|
||||||
|
if not sentences:
|
||||||
|
raise NotValidPayload(sentences)
|
||||||
|
|
||||||
|
translated_sentences = []
|
||||||
|
try:
|
||||||
|
for sentence in sentences:
|
||||||
|
translated = self.translate(text=sentence, **kwargs)
|
||||||
|
translated_sentences.append(translated)
|
||||||
|
|
||||||
|
return translated_sentences
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
def translate_file(self, path, **kwargs):
|
||||||
|
"""
|
||||||
|
translate directly from file
|
||||||
|
@param path: path to the target file
|
||||||
|
@type path: str
|
||||||
|
@param kwargs: additional args
|
||||||
|
@return: str
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with open(path) as f:
|
||||||
|
text = f.read()
|
||||||
|
|
||||||
|
return self.translate(text=text)
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
def translate_batch(self, batch=None):
|
||||||
|
"""
|
||||||
|
translate a list of texts
|
||||||
|
@param batch: list of texts you want to translate
|
||||||
|
@return: list of translations
|
||||||
|
"""
|
||||||
|
if not batch:
|
||||||
|
raise Exception("Enter your text list that you want to translate")
|
||||||
|
|
||||||
|
arr = []
|
||||||
|
for text in batch:
|
||||||
|
translated = self.translate(text)
|
||||||
|
arr.append(translated)
|
||||||
|
sleep(2)
|
||||||
|
|
||||||
|
return arr
|
@ -0,0 +1,71 @@
|
|||||||
|
"""parent translator class"""
|
||||||
|
|
||||||
|
from deep_translator.exceptions import NotValidPayload, NotValidLength
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
|
||||||
|
class BaseTranslator(ABC):
|
||||||
|
"""
|
||||||
|
Abstract class that serve as a parent translator for other different translators
|
||||||
|
"""
|
||||||
|
def __init__(self,
|
||||||
|
base_url=None,
|
||||||
|
source="auto",
|
||||||
|
target="en",
|
||||||
|
payload_key=None,
|
||||||
|
element_tag=None,
|
||||||
|
element_query=None,
|
||||||
|
**url_params):
|
||||||
|
"""
|
||||||
|
@param source: source language to translate from
|
||||||
|
@param target: target language to translate to
|
||||||
|
"""
|
||||||
|
self.__base_url = base_url
|
||||||
|
self._source = source
|
||||||
|
self._target = target
|
||||||
|
self._url_params = url_params
|
||||||
|
self._element_tag = element_tag
|
||||||
|
self._element_query = element_query
|
||||||
|
self.payload_key = payload_key
|
||||||
|
self.headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) '
|
||||||
|
'AppleWebit/535.19'
|
||||||
|
'(KHTML, like Gecko) Chrome/18.0.1025.168 Safari/535.19'}
|
||||||
|
super(BaseTranslator, self).__init__()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _validate_payload(payload, min_chars=1, max_chars=5000):
|
||||||
|
"""
|
||||||
|
validate the target text to translate
|
||||||
|
@param payload: text to translate
|
||||||
|
@return: bool
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not payload or not isinstance(payload, str):
|
||||||
|
raise NotValidPayload(payload)
|
||||||
|
if not BaseTranslator.__check_length(payload, min_chars, max_chars):
|
||||||
|
raise NotValidLength(payload, min_chars, max_chars)
|
||||||
|
return True
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def __check_length(payload, min_chars, max_chars):
|
||||||
|
"""
|
||||||
|
check length of the provided target text to translate
|
||||||
|
@param payload: text to translate
|
||||||
|
@param min_chars: minimum characters allowed
|
||||||
|
@param max_chars: maximum characters allowed
|
||||||
|
@return: bool
|
||||||
|
"""
|
||||||
|
return True if min_chars < len(payload) < max_chars else False
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def translate(self, text, **kwargs):
|
||||||
|
"""
|
||||||
|
translate a text using a translator under the hood and return the translated text
|
||||||
|
@param text: text to translate
|
||||||
|
@param kwargs: additional arguments
|
||||||
|
@return: str
|
||||||
|
"""
|
||||||
|
return NotImplemented('You need to implement the translate method!')
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,136 @@
|
|||||||
|
"""
|
||||||
|
pons translator API
|
||||||
|
"""
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import requests
|
||||||
|
from deep_translator.constants import BASE_URLS, PONS_LANGUAGES_TO_CODES, PONS_CODES_TO_LANGUAGES
|
||||||
|
from deep_translator.exceptions import (LanguageNotSupportedException,
|
||||||
|
TranslationNotFound,
|
||||||
|
NotValidPayload,
|
||||||
|
ElementNotFoundInGetRequest,
|
||||||
|
RequestError,
|
||||||
|
TooManyRequests)
|
||||||
|
from deep_translator.parent import BaseTranslator
|
||||||
|
from requests.utils import requote_uri
|
||||||
|
|
||||||
|
|
||||||
|
class PonsTranslator(BaseTranslator):
|
||||||
|
"""
|
||||||
|
class that uses PONS translator to translate words
|
||||||
|
"""
|
||||||
|
_languages = PONS_LANGUAGES_TO_CODES
|
||||||
|
supported_languages = list(_languages.keys())
|
||||||
|
|
||||||
|
def __init__(self, source, target="english"):
|
||||||
|
"""
|
||||||
|
@param source: source language to translate from
|
||||||
|
@param target: target language to translate to
|
||||||
|
"""
|
||||||
|
self.__base_url = BASE_URLS.get("PONS")
|
||||||
|
|
||||||
|
if self.is_language_supported(source, target):
|
||||||
|
self._source, self._target = self._map_language_to_code(source, target)
|
||||||
|
|
||||||
|
super().__init__(base_url=self.__base_url,
|
||||||
|
source=self._source,
|
||||||
|
target=self._target,
|
||||||
|
payload_key=None,
|
||||||
|
element_tag='div',
|
||||||
|
element_query={"class": "target"}
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_supported_languages(as_dict=False):
|
||||||
|
"""
|
||||||
|
return the supported languages by the linguee translator
|
||||||
|
@param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
|
||||||
|
@return: list or dict
|
||||||
|
"""
|
||||||
|
return PonsTranslator.supported_languages if not as_dict else PonsTranslator._languages
|
||||||
|
|
||||||
|
def _map_language_to_code(self, *languages, **kwargs):
|
||||||
|
"""
|
||||||
|
map language to its corresponding code (abbreviation) if the language was passed by its full name by the user
|
||||||
|
@param languages: list of languages
|
||||||
|
@return: mapped value of the language or raise an exception if the language is not supported
|
||||||
|
"""
|
||||||
|
for language in languages:
|
||||||
|
if language in self._languages.values():
|
||||||
|
yield PONS_CODES_TO_LANGUAGES[language]
|
||||||
|
elif language in self._languages.keys():
|
||||||
|
yield language
|
||||||
|
else:
|
||||||
|
raise LanguageNotSupportedException(language)
|
||||||
|
|
||||||
|
def is_language_supported(self, *languages, **kwargs):
|
||||||
|
"""
|
||||||
|
check if the language is supported by the translator
|
||||||
|
@param languages: list of languages
|
||||||
|
@return: bool or raise an Exception
|
||||||
|
"""
|
||||||
|
for lang in languages:
|
||||||
|
if lang not in self._languages.keys():
|
||||||
|
if lang not in self._languages.values():
|
||||||
|
raise LanguageNotSupportedException(lang)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def translate(self, word, return_all=False, **kwargs):
|
||||||
|
"""
|
||||||
|
function that uses PONS to translate a word
|
||||||
|
@param word: word to translate
|
||||||
|
@type word: str
|
||||||
|
@param return_all: set to True to return all synonym of the translated word
|
||||||
|
@type return_all: bool
|
||||||
|
@return: str: translated word
|
||||||
|
"""
|
||||||
|
if self._validate_payload(word, max_chars=50):
|
||||||
|
url = "{}{}-{}/{}".format(self.__base_url, self._source, self._target, word)
|
||||||
|
url = requote_uri(url)
|
||||||
|
response = requests.get(url)
|
||||||
|
|
||||||
|
if response.status_code == 429:
|
||||||
|
raise TooManyRequests()
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise RequestError()
|
||||||
|
|
||||||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
elements = soup.findAll(self._element_tag, self._element_query)
|
||||||
|
|
||||||
|
if not elements:
|
||||||
|
raise ElementNotFoundInGetRequest(word)
|
||||||
|
|
||||||
|
filtered_elements = []
|
||||||
|
for el in elements:
|
||||||
|
temp = ''
|
||||||
|
for e in el.findAll('a'):
|
||||||
|
if e.parent.name == 'div':
|
||||||
|
if e and "/translate/{}-{}/".format(self._target, self._source) in e.get('href'):
|
||||||
|
temp += e.get_text() + ' '
|
||||||
|
filtered_elements.append(temp)
|
||||||
|
|
||||||
|
if not filtered_elements:
|
||||||
|
raise ElementNotFoundInGetRequest(word)
|
||||||
|
|
||||||
|
word_list = [word for word in filtered_elements if word and len(word) > 1]
|
||||||
|
|
||||||
|
if not word_list:
|
||||||
|
raise TranslationNotFound(word)
|
||||||
|
|
||||||
|
return word_list if return_all else word_list[0]
|
||||||
|
|
||||||
|
def translate_words(self, words, **kwargs):
|
||||||
|
"""
|
||||||
|
translate a batch of words together by providing them in a list
|
||||||
|
@param words: list of words you want to translate
|
||||||
|
@param kwargs: additional args
|
||||||
|
@return: list of translated words
|
||||||
|
"""
|
||||||
|
if not words:
|
||||||
|
raise NotValidPayload(words)
|
||||||
|
|
||||||
|
translated_words = []
|
||||||
|
for word in words:
|
||||||
|
translated_words.append(self.translate(payload=word))
|
||||||
|
return translated_words
|
||||||
|
|
@ -0,0 +1,91 @@
|
|||||||
|
|
||||||
|
import requests
|
||||||
|
from requests.utils import requote_uri
|
||||||
|
from deep_translator.constants import BASE_URLS
|
||||||
|
from deep_translator.exceptions import (RequestError,
|
||||||
|
ServerException, TranslationNotFound, TooManyRequests)
|
||||||
|
|
||||||
|
|
||||||
|
class QCRI(object):
|
||||||
|
"""
|
||||||
|
class that wraps functions, which use the QRCI translator under the hood to translate word(s)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, api_key=None):
|
||||||
|
"""
|
||||||
|
@param api_key: your qrci api key. Get one for free here https://mt.qcri.org/api/v1/ref
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not api_key:
|
||||||
|
raise ServerException(401)
|
||||||
|
self.__base_url = BASE_URLS.get("QCRI")
|
||||||
|
|
||||||
|
self.api_key = api_key
|
||||||
|
self.api_endpoints = {
|
||||||
|
"get_languages": "getLanguagePairs",
|
||||||
|
"get_domains": "getDomains",
|
||||||
|
"translate": "translate",
|
||||||
|
}
|
||||||
|
|
||||||
|
self.params = {
|
||||||
|
"key": self.api_key
|
||||||
|
}
|
||||||
|
|
||||||
|
def _get(self, endpoint, params=None, return_text=True):
|
||||||
|
if not params:
|
||||||
|
params = self.params
|
||||||
|
try:
|
||||||
|
res = requests.get(self.__base_url.format(endpoint=self.api_endpoints[endpoint]), params=params)
|
||||||
|
return res.text if return_text else res
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
def get_supported_languages(self):
|
||||||
|
|
||||||
|
pairs = self._get("get_languages")
|
||||||
|
return pairs
|
||||||
|
|
||||||
|
@property
|
||||||
|
def languages(self):
|
||||||
|
return self.get_supported_languages()
|
||||||
|
|
||||||
|
def get_domains(self):
|
||||||
|
domains = self._get("get_domains")
|
||||||
|
return domains
|
||||||
|
|
||||||
|
@property
|
||||||
|
def domains(self):
|
||||||
|
return self.get_domains()
|
||||||
|
|
||||||
|
def translate(self, source, target, domain, text):
|
||||||
|
params = {
|
||||||
|
"key": self.api_key,
|
||||||
|
"langpair": "{}-{}".format(source, target),
|
||||||
|
"domain": domain,
|
||||||
|
"text": text
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
response = self._get("translate", params=params, return_text=False)
|
||||||
|
except ConnectionError:
|
||||||
|
raise ServerException(503)
|
||||||
|
|
||||||
|
else:
|
||||||
|
if response.status_code != 200:
|
||||||
|
ServerException(response.status_code)
|
||||||
|
else:
|
||||||
|
res = response.json()
|
||||||
|
translation = res["translatedText"]
|
||||||
|
if not translation:
|
||||||
|
raise TranslationNotFound(text)
|
||||||
|
return translation
|
||||||
|
|
||||||
|
def translate_batch(self, source, target, domain, batch):
|
||||||
|
"""
|
||||||
|
translate a batch of texts
|
||||||
|
@param source: source language
|
||||||
|
@param target: target language
|
||||||
|
@param batch: list of texts to translate
|
||||||
|
@return: list of translations
|
||||||
|
"""
|
||||||
|
return [self.translate(source, target, domain, text) for text in batch]
|
||||||
|
|
@ -0,0 +1 @@
|
|||||||
|
"""Unit test package for deep_translator."""
|
@ -0,0 +1,57 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
"""Tests for `deep_translator` package."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from deep_translator import exceptions, GoogleTranslator
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def google_translator():
|
||||||
|
"""Sample pytest fixture.
|
||||||
|
|
||||||
|
See more at: http://doc.pytest.org/en/latest/fixture.html
|
||||||
|
"""
|
||||||
|
return GoogleTranslator(target='en')
|
||||||
|
|
||||||
|
|
||||||
|
def test_content(google_translator):
|
||||||
|
"""Sample pytest test function with the pytest fixture as an argument."""
|
||||||
|
# from bs4 import BeautifulSoup
|
||||||
|
# assert 'GitHub' in BeautifulSoup(response.content).title.string
|
||||||
|
assert google_translator.translate(text='좋은') == "good"
|
||||||
|
|
||||||
|
|
||||||
|
def test_inputs():
|
||||||
|
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||||
|
GoogleTranslator(source="", target="")
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||||
|
GoogleTranslator(source="auto", target="nothing")
|
||||||
|
|
||||||
|
# test abbreviations and languages
|
||||||
|
g1 = GoogleTranslator("en", "fr")
|
||||||
|
g2 = GoogleTranslator("english", "french")
|
||||||
|
assert g1._source == g2._source
|
||||||
|
assert g1._target == g2._target
|
||||||
|
|
||||||
|
|
||||||
|
def test_payload(google_translator):
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.NotValidPayload):
|
||||||
|
google_translator.translate(text="")
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.NotValidPayload):
|
||||||
|
google_translator.translate(text=123)
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.NotValidPayload):
|
||||||
|
google_translator.translate(text={})
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.NotValidPayload):
|
||||||
|
google_translator.translate(text=[])
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.NotValidLength):
|
||||||
|
google_translator.translate("a"*5001)
|
||||||
|
|
||||||
|
#for _ in range(1):
|
||||||
|
#assert google_translator.translate(text='좋은') == "good"
|
@ -0,0 +1,49 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
"""Tests for `deep_translator` package."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from deep_translator import exceptions, LingueeTranslator
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def linguee():
|
||||||
|
return LingueeTranslator(source="english", target='french')
|
||||||
|
|
||||||
|
|
||||||
|
def test_content(linguee):
|
||||||
|
"""Sample pytest test function with the pytest fixture as an argument."""
|
||||||
|
# from bs4 import BeautifulSoup
|
||||||
|
# assert 'GitHub' in BeautifulSoup(response.content).title.string
|
||||||
|
assert linguee.translate(word='good') is not None
|
||||||
|
|
||||||
|
|
||||||
|
def test_inputs():
|
||||||
|
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||||
|
LingueeTranslator(source="", target="")
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||||
|
LingueeTranslator(source="auto", target="nothing")
|
||||||
|
|
||||||
|
l1 = LingueeTranslator("en", "fr")
|
||||||
|
l2 = LingueeTranslator("english", "french")
|
||||||
|
assert l1._source == l2._source
|
||||||
|
assert l1._target == l2._target
|
||||||
|
|
||||||
|
|
||||||
|
def test_payload(linguee):
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.NotValidPayload):
|
||||||
|
linguee.translate("")
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.NotValidPayload):
|
||||||
|
linguee.translate(123)
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.NotValidPayload):
|
||||||
|
linguee.translate({})
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.NotValidPayload):
|
||||||
|
linguee.translate([])
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.NotValidLength):
|
||||||
|
linguee.translate("a"*51)
|
@ -0,0 +1,48 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
"""Tests for `deep_translator` package."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from deep_translator import exceptions, MyMemoryTranslator
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mymemory():
|
||||||
|
return MyMemoryTranslator(source="en", target='fr')
|
||||||
|
|
||||||
|
|
||||||
|
def test_content(mymemory):
|
||||||
|
"""Sample pytest test function with the pytest fixture as an argument."""
|
||||||
|
# from bs4 import BeautifulSoup
|
||||||
|
# assert 'GitHub' in BeautifulSoup(response.content).title.string
|
||||||
|
assert mymemory.translate(text='good') is not None
|
||||||
|
|
||||||
|
|
||||||
|
def test_inputs():
|
||||||
|
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||||
|
MyMemoryTranslator(source="", target="")
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||||
|
MyMemoryTranslator(source="auto", target="nothing")
|
||||||
|
m1 = MyMemoryTranslator("en", "fr")
|
||||||
|
m2 = MyMemoryTranslator("english", "french")
|
||||||
|
assert m1._source == m2._source
|
||||||
|
assert m1._target == m2._target
|
||||||
|
|
||||||
|
|
||||||
|
def test_payload(mymemory):
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.NotValidPayload):
|
||||||
|
mymemory.translate(text="")
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.NotValidPayload):
|
||||||
|
mymemory.translate(text=123)
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.NotValidPayload):
|
||||||
|
mymemory.translate(text={})
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.NotValidPayload):
|
||||||
|
mymemory.translate(text=[])
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.NotValidLength):
|
||||||
|
mymemory.translate(text="a"*501)
|
@ -0,0 +1,48 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
"""Tests for `deep_translator` package."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from deep_translator import exceptions, PonsTranslator
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def pons():
|
||||||
|
return PonsTranslator(source="english", target='french')
|
||||||
|
|
||||||
|
|
||||||
|
def test_content(pons):
|
||||||
|
"""Sample pytest test function with the pytest fixture as an argument."""
|
||||||
|
# from bs4 import BeautifulSoup
|
||||||
|
# assert 'GitHub' in BeautifulSoup(response.content).title.string
|
||||||
|
assert pons.translate(word='good') is not None
|
||||||
|
|
||||||
|
|
||||||
|
def test_inputs():
|
||||||
|
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||||
|
PonsTranslator(source="", target="")
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||||
|
PonsTranslator(source="auto", target="nothing")
|
||||||
|
l1 = PonsTranslator("en", "fr")
|
||||||
|
l2 = PonsTranslator("english", "french")
|
||||||
|
assert l1._source == l2._source
|
||||||
|
assert l1._target == l2._target
|
||||||
|
|
||||||
|
|
||||||
|
def test_payload(pons):
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.NotValidPayload):
|
||||||
|
pons.translate("")
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.NotValidPayload):
|
||||||
|
pons.translate(123)
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.NotValidPayload):
|
||||||
|
pons.translate({})
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.NotValidPayload):
|
||||||
|
pons.translate([])
|
||||||
|
|
||||||
|
with pytest.raises(exceptions.NotValidLength):
|
||||||
|
pons.translate("a" * 51)
|
@ -0,0 +1,3 @@
|
|||||||
|
"""
|
||||||
|
utilities
|
||||||
|
"""
|
@ -0,0 +1,132 @@
|
|||||||
|
"""
|
||||||
|
Yandex translator API
|
||||||
|
"""
|
||||||
|
import requests
|
||||||
|
from requests import exceptions
|
||||||
|
from deep_translator.constants import BASE_URLS
|
||||||
|
from deep_translator.exceptions import (RequestError,
|
||||||
|
ServerException, TranslationNotFound, TooManyRequests)
|
||||||
|
|
||||||
|
|
||||||
|
class YandexTranslator(object):
|
||||||
|
"""
|
||||||
|
class that wraps functions, which use the yandex translator under the hood to translate word(s)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, api_key=None):
|
||||||
|
"""
|
||||||
|
@param api_key: your yandex api key
|
||||||
|
"""
|
||||||
|
if not api_key:
|
||||||
|
raise ServerException(401)
|
||||||
|
self.__base_url = BASE_URLS.get("YANDEX")
|
||||||
|
|
||||||
|
self.api_key = api_key
|
||||||
|
self.api_version = "v1.5"
|
||||||
|
self.api_endpoints = {
|
||||||
|
"langs": "getLangs",
|
||||||
|
"detect": "detect",
|
||||||
|
"translate": "translate",
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_supported_languages(self):
|
||||||
|
return set(x.split("-")[0] for x in self.dirs)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def languages(self):
|
||||||
|
return self.get_supported_languages()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dirs(self, proxies=None):
|
||||||
|
|
||||||
|
try:
|
||||||
|
url = self.__base_url.format(version=self.api_version, endpoint="getLangs")
|
||||||
|
print("url: ", url)
|
||||||
|
response = requests.get(url, params={"key": self.api_key}, proxies=proxies)
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
raise ServerException(503)
|
||||||
|
else:
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise ServerException(response.status_code)
|
||||||
|
return data.get("dirs")
|
||||||
|
|
||||||
|
def detect(self, text, proxies=None):
|
||||||
|
response = None
|
||||||
|
params = {
|
||||||
|
"text": text,
|
||||||
|
"format": "plain",
|
||||||
|
"key": self.api_key,
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
url = self.__base_url.format(version=self.api_version, endpoint="detect")
|
||||||
|
response = requests.post(url, data=params, proxies=proxies)
|
||||||
|
|
||||||
|
except RequestError:
|
||||||
|
raise
|
||||||
|
except ConnectionError:
|
||||||
|
raise ServerException(503)
|
||||||
|
except ValueError:
|
||||||
|
raise ServerException(response.status_code)
|
||||||
|
else:
|
||||||
|
response = response.json()
|
||||||
|
language = response['lang']
|
||||||
|
status_code = response['code']
|
||||||
|
if status_code != 200:
|
||||||
|
raise RequestError()
|
||||||
|
elif not language:
|
||||||
|
raise ServerException(501)
|
||||||
|
return language
|
||||||
|
|
||||||
|
def translate(self, source, target, text, proxies=None):
|
||||||
|
params = {
|
||||||
|
"text": text,
|
||||||
|
"format": "plain",
|
||||||
|
"lang": target if source == "auto" else "{}-{}".format(source, target),
|
||||||
|
"key": self.api_key
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
url = self.__base_url.format(version=self.api_version, endpoint="translate")
|
||||||
|
response = requests.post(url, data=params, proxies=proxies)
|
||||||
|
except ConnectionError:
|
||||||
|
raise ServerException(503)
|
||||||
|
else:
|
||||||
|
response = response.json()
|
||||||
|
|
||||||
|
if response['code'] == 429:
|
||||||
|
raise TooManyRequests()
|
||||||
|
|
||||||
|
if response['code'] != 200:
|
||||||
|
raise ServerException(response['code'])
|
||||||
|
|
||||||
|
if not response['text']:
|
||||||
|
raise TranslationNotFound()
|
||||||
|
|
||||||
|
return response['text']
|
||||||
|
|
||||||
|
def translate_file(self, source, target, path):
|
||||||
|
"""
|
||||||
|
translate from a file
|
||||||
|
@param source: source language
|
||||||
|
@param target: target language
|
||||||
|
@param path: path to file
|
||||||
|
@return: translated text
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with open(path) as f:
|
||||||
|
text = f.read()
|
||||||
|
|
||||||
|
return self.translate(source, target, text)
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
def translate_batch(self, source, target, batch):
|
||||||
|
"""
|
||||||
|
translate a batch of texts
|
||||||
|
@param source: source language
|
||||||
|
@param target: target language
|
||||||
|
@param batch: list of texts to translate
|
||||||
|
@return: list of translations
|
||||||
|
"""
|
||||||
|
return [self.translate(source, target, text) for text in batch]
|
Loading…
Reference in new issue