You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
bazarr/libs/deep_translator/constants.py

642 lines
16 KiB

__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
OPEN_AI_ENV_VAR = "OPEN_API_KEY"
DEEPL_ENV_VAR = "DEEPL_API_KEY"
LIBRE_ENV_VAR = "LIBRE_API_KEY"
MSFT_ENV_VAR = "MICROSOFT_API_KEY"
QCRI_ENV_VAR = "QCRI_API_KEY"
YANDEX_ENV_VAR = "YANDEX_API_KEY"
BAIDU_APPID_ENV_VAR = "BAIDU_APPID"
BAIDU_APPKEY_ENV_VAR = "BAIDU_APPKEY"
BASE_URLS = {
"GOOGLE_TRANSLATE": "https://translate.google.com/m",
"PONS": "https://en.pons.com/translate/",
"YANDEX": "https://translate.yandex.net/api/{version}/tr.json/{endpoint}",
"LINGUEE": "https://www.linguee.com/",
"MYMEMORY": "http://api.mymemory.translated.net/get",
"QCRI": "https://mt.qcri.org/api/v1/{endpoint}?",
"DEEPL": "https://api.deepl.com/{version}/",
"DEEPL_FREE": "https://api-free.deepl.com/{version}/",
"MICROSOFT_TRANSLATE": "https://api.cognitive.microsofttranslator.com/translate?api-version=3.0",
"PAPAGO": "https://papago.naver.com/",
"PAPAGO_API": "https://openapi.naver.com/v1/papago/n2mt",
"LIBRE": "https://libretranslate.com/",
"LIBRE_FREE": "https://libretranslate.de/",
"BAIDU": "https://fanyi-api.baidu.com/api/trans/vip/translate",
}
GOOGLE_LANGUAGES_TO_CODES = {
"afrikaans": "af",
"albanian": "sq",
"amharic": "am",
"arabic": "ar",
"armenian": "hy",
"assamese": "as",
"aymara": "ay",
"azerbaijani": "az",
"bambara": "bm",
"basque": "eu",
"belarusian": "be",
"bengali": "bn",
"bhojpuri": "bho",
"bosnian": "bs",
"bulgarian": "bg",
"catalan": "ca",
"cebuano": "ceb",
"chichewa": "ny",
"chinese (simplified)": "zh-CN",
"chinese (traditional)": "zh-TW",
"corsican": "co",
"croatian": "hr",
"czech": "cs",
"danish": "da",
"dhivehi": "dv",
"dogri": "doi",
"dutch": "nl",
"english": "en",
"esperanto": "eo",
"estonian": "et",
"ewe": "ee",
"filipino": "tl",
"finnish": "fi",
"french": "fr",
"frisian": "fy",
"galician": "gl",
"georgian": "ka",
"german": "de",
"greek": "el",
"guarani": "gn",
"gujarati": "gu",
"haitian creole": "ht",
"hausa": "ha",
"hawaiian": "haw",
"hebrew": "iw",
"hindi": "hi",
"hmong": "hmn",
"hungarian": "hu",
"icelandic": "is",
"igbo": "ig",
"ilocano": "ilo",
"indonesian": "id",
"irish": "ga",
"italian": "it",
"japanese": "ja",
"javanese": "jw",
"kannada": "kn",
"kazakh": "kk",
"khmer": "km",
"kinyarwanda": "rw",
"konkani": "gom",
"korean": "ko",
"krio": "kri",
"kurdish (kurmanji)": "ku",
"kurdish (sorani)": "ckb",
"kyrgyz": "ky",
"lao": "lo",
"latin": "la",
"latvian": "lv",
"lingala": "ln",
"lithuanian": "lt",
"luganda": "lg",
"luxembourgish": "lb",
"macedonian": "mk",
"maithili": "mai",
"malagasy": "mg",
"malay": "ms",
"malayalam": "ml",
"maltese": "mt",
"maori": "mi",
"marathi": "mr",
"meiteilon (manipuri)": "mni-Mtei",
"mizo": "lus",
"mongolian": "mn",
"myanmar": "my",
"nepali": "ne",
"norwegian": "no",
"odia (oriya)": "or",
"oromo": "om",
"pashto": "ps",
"persian": "fa",
"polish": "pl",
"portuguese": "pt",
"punjabi": "pa",
"quechua": "qu",
"romanian": "ro",
"russian": "ru",
"samoan": "sm",
"sanskrit": "sa",
"scots gaelic": "gd",
"sepedi": "nso",
"serbian": "sr",
"sesotho": "st",
"shona": "sn",
"sindhi": "sd",
"sinhala": "si",
"slovak": "sk",
"slovenian": "sl",
"somali": "so",
"spanish": "es",
"sundanese": "su",
"swahili": "sw",
"swedish": "sv",
"tajik": "tg",
"tamil": "ta",
"tatar": "tt",
"telugu": "te",
"thai": "th",
"tigrinya": "ti",
"tsonga": "ts",
"turkish": "tr",
"turkmen": "tk",
"twi": "ak",
"ukrainian": "uk",
"urdu": "ur",
"uyghur": "ug",
"uzbek": "uz",
"vietnamese": "vi",
"welsh": "cy",
"xhosa": "xh",
"yiddish": "yi",
"yoruba": "yo",
"zulu": "zu",
}
PONS_CODES_TO_LANGUAGES = {
"ar": "arabic",
"bg": "bulgarian",
"zh-cn": "chinese",
"cs": "czech",
"da": "danish",
"nl": "dutch",
"en": "english",
"fr": "french",
"de": "german",
"el": "greek",
"hu": "hungarian",
"it": "italian",
"la": "latin",
"no": "norwegian",
"pl": "polish",
"pt": "portuguese",
"ru": "russian",
"sl": "slovenian",
"es": "spanish",
"sv": "swedish",
"tr": "turkish",
"elv": "elvish",
}
LINGUEE_LANGUAGES_TO_CODES = {
"maltese": "maltese",
"english": "english",
"german": "german",
"bulgarian": "bulgarian",
"polish": "polish",
"portuguese": "portuguese",
"hungarian": "hungarian",
"romanian": "romanian",
"russian": "russian",
# "serbian": "sr",
"dutch": "dutch",
"slovakian": "slovakian",
"greek": "greek",
"slovenian": "slovenian",
"danish": "danish",
"italian": "italian",
"spanish": "spanish",
"finnish": "finnish",
"chinese": "chinese",
"french": "french",
# "croatian": "hr",
"czech": "czech",
"laotian": "laotian",
"swedish": "swedish",
"latvian": "latvian",
"estonian": "estonian",
"japanese": "japanese",
}
MY_MEMORY_LANGUAGES_TO_CODES = {
"acehnese": "ace-ID",
"afrikaans": "af-ZA",
"akan": "ak-GH",
"albanian": "sq-AL",
"amharic": "am-ET",
"antigua and barbuda creole english": "aig-AG",
"arabic": "ar-SA",
"arabic egyptian": "ar-EG",
"aragonese": "an-ES",
"armenian": "hy-AM",
"assamese": "as-IN",
"asturian": "ast-ES",
"austrian german": "de-AT",
"awadhi": "awa-IN",
"ayacucho quechua": "quy-PE",
"azerbaijani": "az-AZ",
"bahamas creole english": "bah-BS",
"bajan": "bjs-BB",
"balinese": "ban-ID",
"balkan gipsy": "rm-RO",
"bambara": "bm-ML",
"banjar": "bjn-ID",
"bashkir": "ba-RU",
"basque": "eu-ES",
"belarusian": "be-BY",
"belgian french": "fr-BE",
"bemba": "bem-ZM",
"bengali": "bn-IN",
"bhojpuri": "bho-IN",
"bihari": "bh-IN",
"bislama": "bi-VU",
"borana": "gax-KE",
"bosnian": "bs-BA",
"bosnian (cyrillic)": "bs-Cyrl-BA",
"breton": "br-FR",
"buginese": "bug-ID",
"bulgarian": "bg-BG",
"burmese": "my-MM",
"catalan": "ca-ES",
"catalan valencian": "cav-ES",
"cebuano": "ceb-PH",
"central atlas tamazight": "tzm-MA",
"central aymara": "ayr-BO",
"central kanuri (latin script)": "knc-NG",
"chadian arabic": "shu-TD",
"chamorro": "ch-GU",
"cherokee": "chr-US",
"chhattisgarhi": "hne-IN",
"chinese simplified": "zh-CN",
"chinese trad. (hong kong)": "zh-HK",
"chinese traditional": "zh-TW",
"chinese traditional macau": "zh-MO",
"chittagonian": "ctg-BD",
"chokwe": "cjk-AO",
"classical greek": "grc-GR",
"comorian ngazidja": "zdj-KM",
"coptic": "cop-EG",
"crimean tatar": "crh-RU",
"crioulo upper guinea": "pov-GW",
"croatian": "hr-HR",
"czech": "cs-CZ",
"danish": "da-DK",
"dari": "prs-AF",
"dimli": "diq-TR",
"dutch": "nl-NL",
"dyula": "dyu-CI",
"dzongkha": "dz-BT",
"eastern yiddish": "ydd-US",
"emakhuwa": "vmw-MZ",
"english": "en-GB",
"english australia": "en-AU",
"english canada": "en-CA",
"english india": "en-IN",
"english ireland": "en-IE",
"english new zealand": "en-NZ",
"english singapore": "en-SG",
"english south africa": "en-ZA",
"english us": "en-US",
"esperanto": "eo-EU",
"estonian": "et-EE",
"ewe": "ee-GH",
"fanagalo": "fn-FNG",
"faroese": "fo-FO",
"fijian": "fj-FJ",
"filipino": "fil-PH",
"finnish": "fi-FI",
"flemish": "nl-BE",
"fon": "fon-BJ",
"french": "fr-FR",
"french canada": "fr-CA",
"french swiss": "fr-CH",
"friulian": "fur-IT",
"fula": "ff-FUL",
"galician": "gl-ES",
"gamargu": "mfi-NG",
"garo": "grt-IN",
"georgian": "ka-GE",
"german": "de-DE",
"gilbertese": "gil-KI",
"glavda": "glw-NG",
"greek": "el-GR",
"grenadian creole english": "gcl-GD",
"guarani": "gn-PY",
"gujarati": "gu-IN",
"guyanese creole english": "gyn-GY",
"haitian creole french": "ht-HT",
"halh mongolian": "khk-MN",
"hausa": "ha-NE",
"hawaiian": "haw-US",
"hebrew": "he-IL",
"higi": "hig-NG",
"hiligaynon": "hil-PH",
"hill mari": "mrj-RU",
"hindi": "hi-IN",
"hmong": "hmn-CN",
"hungarian": "hu-HU",
"icelandic": "is-IS",
"igbo ibo": "ibo-NG",
"igbo ig": "ig-NG",
"ilocano": "ilo-PH",
"indonesian": "id-ID",
"inuktitut greenlandic": "kl-GL",
"irish gaelic": "ga-IE",
"italian": "it-IT",
"italian swiss": "it-CH",
"jamaican creole english": "jam-JM",
"japanese": "ja-JP",
"javanese": "jv-ID",
"jingpho": "kac-MM",
"k'iche'": "quc-GT",
"kabiyè": "kbp-TG",
"kabuverdianu": "kea-CV",
"kabylian": "kab-DZ",
"kalenjin": "kln-KE",
"kamba": "kam-KE",
"kannada": "kn-IN",
"kanuri": "kr-KAU",
"karen": "kar-MM",
"kashmiri (devanagari script)": "ks-IN",
"kashmiri (arabic script)": "kas-IN",
"kazakh": "kk-KZ",
"khasi": "kha-IN",
"khmer": "km-KH",
"kikuyu kik": "kik-KE",
"kikuyu ki": "ki-KE",
"kimbundu": "kmb-AO",
"kinyarwanda": "rw-RW",
"kirundi": "rn-BI",
"kisii": "guz-KE",
"kongo": "kg-CG",
"konkani": "kok-IN",
"korean": "ko-KR",
"northern kurdish": "kmr-TR",
"kurdish sorani": "ckb-IQ",
"kyrgyz": "ky-KG",
"lao": "lo-LA",
"latgalian": "ltg-LV",
"latin": "la-XN",
"latvian": "lv-LV",
"ligurian": "lij-IT",
"limburgish": "li-NL",
"lingala": "ln-LIN",
"lithuanian": "lt-LT",
"lombard": "lmo-IT",
"luba-kasai": "lua-CD",
"luganda": "lg-UG",
"luhya": "luy-KE",
"luo": "luo-KE",
"luxembourgish": "lb-LU",
"maa": "mas-KE",
"macedonian": "mk-MK",
"magahi": "mag-IN",
"maithili": "mai-IN",
"malagasy": "mg-MG",
"malay": "ms-MY",
"malayalam": "ml-IN",
"maldivian": "dv-MV",
"maltese": "mt-MT",
"mandara": "mfi-CM",
"manipuri": "mni-IN",
"manx gaelic": "gv-IM",
"maori": "mi-NZ",
"marathi": "mr-IN",
"margi": "mrt-NG",
"mari": "mhr-RU",
"marshallese": "mh-MH",
"mende": "men-SL",
"meru": "mer-KE",
"mijikenda": "nyf-KE",
"minangkabau": "min-ID",
"mizo": "lus-IN",
"mongolian": "mn-MN",
"montenegrin": "sr-ME",
"morisyen": "mfe-MU",
"moroccan arabic": "ar-MA",
"mossi": "mos-BF",
"ndau": "ndc-MZ",
"ndebele": "nr-ZA",
"nepali": "ne-NP",
"nigerian fulfulde": "fuv-NG",
"niuean": "niu-NU",
"north azerbaijani": "azj-AZ",
"sesotho": "nso-ZA",
"northern uzbek": "uzn-UZ",
"norwegian bokmål": "nb-NO",
"norwegian nynorsk": "nn-NO",
"nuer": "nus-SS",
"nyanja": "ny-MW",
"occitan": "oc-FR",
"occitan aran": "oc-ES",
"odia": "or-IN",
"oriya": "ory-IN",
"urdu": "ur-PK",
"palauan": "pau-PW",
"pali": "pi-IN",
"pangasinan": "pag-PH",
"papiamentu": "pap-CW",
"pashto": "ps-PK",
"persian": "fa-IR",
"pijin": "pis-SB",
"plateau malagasy": "plt-MG",
"polish": "pl-PL",
"portuguese": "pt-PT",
"portuguese brazil": "pt-BR",
"potawatomi": "pot-US",
"punjabi": "pa-IN",
"punjabi (pakistan)": "pnb-PK",
"quechua": "qu-PE",
"rohingya": "rhg-MM",
"rohingyalish": "rhl-MM",
"romanian": "ro-RO",
"romansh": "roh-CH",
"rundi": "run-BI",
"russian": "ru-RU",
"saint lucian creole french": "acf-LC",
"samoan": "sm-WS",
"sango": "sg-CF",
"sanskrit": "sa-IN",
"santali": "sat-IN",
"sardinian": "sc-IT",
"scots gaelic": "gd-GB",
"sena": "seh-ZW",
"serbian cyrillic": "sr-Cyrl-RS",
"serbian latin": "sr-Latn-RS",
"seselwa creole french": "crs-SC",
"setswana (south africa)": "tn-ZA",
"shan": "shn-MM",
"shona": "sn-ZW",
"sicilian": "scn-IT",
"silesian": "szl-PL",
"sindhi snd": "snd-PK",
"sindhi sd": "sd-PK",
"sinhala": "si-LK",
"slovak": "sk-SK",
"slovenian": "sl-SI",
"somali": "so-SO",
"sotho southern": "st-LS",
"south azerbaijani": "azb-AZ",
"southern pashto": "pbt-PK",
"southwestern dinka": "dik-SS",
"spanish": "es-ES",
"spanish argentina": "es-AR",
"spanish colombia": "es-CO",
"spanish latin america": "es-419",
"spanish mexico": "es-MX",
"spanish united states": "es-US",
"sranan tongo": "srn-SR",
"standard latvian": "lvs-LV",
"standard malay": "zsm-MY",
"sundanese": "su-ID",
"swahili": "sw-KE",
"swati": "ss-SZ",
"swedish": "sv-SE",
"swiss german": "de-CH",
"syriac (aramaic)": "syc-TR",
"tagalog": "tl-PH",
"tahitian": "ty-PF",
"tajik": "tg-TJ",
"tamashek (tuareg)": "tmh-DZ",
"tamasheq": "taq-ML",
"tamil india": "ta-IN",
"tamil sri lanka": "ta-LK",
"taroko": "trv-TW",
"tatar": "tt-RU",
"telugu": "te-IN",
"tetum": "tet-TL",
"thai": "th-TH",
"tibetan": "bo-CN",
"tigrinya": "ti-ET",
"tok pisin": "tpi-PG",
"tokelauan": "tkl-TK",
"tongan": "to-TO",
"tosk albanian": "als-AL",
"tsonga": "ts-ZA",
"tswa": "tsc-MZ",
"tswana": "tn-BW",
"tumbuka": "tum-MW",
"turkish": "tr-TR",
"turkmen": "tk-TM",
"tuvaluan": "tvl-TV",
"twi": "tw-GH",
"udmurt": "udm-RU",
"ukrainian": "uk-UA",
"uma": "ppk-ID",
"umbundu": "umb-AO",
"uyghur uig": "uig-CN",
"uyghur ug": "ug-CN",
"uzbek": "uz-UZ",
"venetian": "vec-IT",
"vietnamese": "vi-VN",
"vincentian creole english": "svc-VC",
"virgin islands creole english": "vic-US",
"wallisian": "wls-WF",
"waray (philippines)": "war-PH",
"welsh": "cy-GB",
"west central oromo": "gaz-ET",
"western persian": "pes-IR",
"wolof": "wo-SN",
"xhosa": "xh-ZA",
"yiddish": "yi-YD",
"yoruba": "yo-NG",
"zulu": "zu-ZA",
}
DEEPL_LANGUAGE_TO_CODE = {
"bulgarian": "bg",
"czech": "cs",
"danish": "da",
"german": "de",
"greek": "el",
"english": "en",
"spanish": "es",
"estonian": "et",
"finnish": "fi",
"french": "fr",
"hungarian": "hu",
"indonesian": "id",
"italian": "it",
"japanese": "ja",
"lithuanian": "lt",
"latvian": "lv",
"dutch": "nl",
"polish": "pl",
"portuguese": "pt",
"romanian": "ro",
"russian": "ru",
"slovak": "sk",
"slovenian": "sl",
"swedish": "sv",
"turkish": "tr",
"ukrainian": "uk",
"chinese": "zh",
}
PAPAGO_LANGUAGE_TO_CODE = {
"ko": "Korean",
"en": "English",
"ja": "Japanese",
"zh-CN": "Chinese",
"zh-TW": "Chinese traditional",
"es": "Spanish",
"fr": "French",
"vi": "Vietnamese",
"th": "Thai",
"id": "Indonesia",
}
QCRI_LANGUAGE_TO_CODE = {"Arabic": "ar", "English": "en", "Spanish": "es"}
LIBRE_LANGUAGES_TO_CODES = {
"English": "en",
"Arabic": "ar",
"Chinese": "zh",
"French": "fr",
"German": "de",
"Hindi": "hi",
"Indonesian": "id",
"Irish": "ga",
"Italian": "it",
"Japanese": "ja",
"Korean": "ko",
"Polish": "pl",
"Portuguese": "pt",
"Russian": "ru",
"Spanish": "es",
"Turkish": "tr",
"Vietnamese": "vi",
}
BAIDU_LANGUAGE_TO_CODE = {
"arabic": "ara",
"bulgarian": "bul",
"chinese (classical)": "wyw",
"chinese (simplified)": "zh",
"chinese (traditional)": "cht",
"czech": "cs",
"danish": "dan",
"dutch": "nl",
"english": "en",
"estonian": "est",
"finnish": "fin",
"french": "fra",
"german": "de",
"greek": "el",
"hungarian": "hu",
"italian": "it",
"japanese": "jp",
"korean": "kor",
"polish": "pl",
"portuguese": "pt",
"romanian": "ro",
"russian": "ru",
"slovenian": "slo",
"spanish": "spa",
"swedish": "swe",
"thai": "th",
"vietnamese": "vie",
"yueyu": "yue",
}