mirror of
https://github.com/morpheus65535/bazarr.git
synced 2025-04-24 06:37:16 -04:00
Updated deep-translator module and made some fix to support translation to Chinese. There's still a bug in this module that prevent it but one it's fixed, it should be fine.
This commit is contained in:
parent
6a88596aab
commit
f0828959f3
26 changed files with 747 additions and 422 deletions
|
@ -434,10 +434,14 @@ def subtitles_apply_mods(language, subtitle_path, mods):
|
|||
def translate_subtitles_file(video_path, source_srt_file, to_lang, forced, hi):
|
||||
language_code_convert_dict = {
|
||||
'he': 'iw',
|
||||
'zt': 'zh-cn',
|
||||
'zh': 'zh-tw',
|
||||
}
|
||||
|
||||
to_lang = alpha3_from_alpha2(to_lang)
|
||||
lang_obj = Language(to_lang)
|
||||
lang_obj = CustomLanguage.from_value(to_lang, "alpha3")
|
||||
if not lang_obj:
|
||||
lang_obj = Language(to_lang)
|
||||
if forced:
|
||||
lang_obj = Language.rebuild(lang_obj, forced=True)
|
||||
if hi:
|
||||
|
@ -447,7 +451,8 @@ def translate_subtitles_file(video_path, source_srt_file, to_lang, forced, hi):
|
|||
|
||||
max_characters = 5000
|
||||
|
||||
dest_srt_file = get_subtitle_path(video_path, language=lang_obj, extension='.srt', forced_tag=forced, hi_tag=hi)
|
||||
dest_srt_file = get_subtitle_path(video_path, language=lang_obj if isinstance(lang_obj, Language) else lang_obj.subzero_language(),
|
||||
extension='.srt', forced_tag=forced, hi_tag=hi)
|
||||
|
||||
subs = pysubs2.load(source_srt_file, encoding='utf-8')
|
||||
lines_list = [x.plaintext for x in subs]
|
||||
|
@ -471,8 +476,8 @@ def translate_subtitles_file(video_path, source_srt_file, to_lang, forced, hi):
|
|||
for block_str in lines_block_list:
|
||||
try:
|
||||
translated_partial_srt_text = GoogleTranslator(source='auto',
|
||||
target=language_code_convert_dict.get(lang_obj.basename,
|
||||
lang_obj.basename)
|
||||
target=language_code_convert_dict.get(lang_obj.alpha2,
|
||||
lang_obj.alpha2)
|
||||
).translate(text=block_str)
|
||||
except:
|
||||
return False
|
||||
|
|
|
@ -13,8 +13,8 @@ export const availableTranslation = {
|
|||
ca: "catalan",
|
||||
ceb: "cebuano",
|
||||
ny: "chichewa",
|
||||
"zh-cn": "chinese (simplified)",
|
||||
"zh-tw": "chinese (traditional)",
|
||||
zh: "chinese (simplified)",
|
||||
zt: "chinese (traditional)",
|
||||
co: "corsican",
|
||||
hr: "croatian",
|
||||
cs: "czech",
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
"""Top-level package for deep_translator."""
|
||||
"""Top-level package for Deep Translator"""
|
||||
|
||||
# TODO: Discussion: Do these need to be in __init__.py? Are they intended to be exportable?
|
||||
from .google_trans import GoogleTranslator
|
||||
from .pons import PonsTranslator
|
||||
from .linguee import LingueeTranslator
|
||||
|
@ -8,18 +9,23 @@ from .yandex import YandexTranslator
|
|||
from .qcri import QCRI
|
||||
from .deepl import DeepL
|
||||
from .detection import single_detection, batch_detection
|
||||
from .microsoft import MicrosoftTranslator
|
||||
from .papago import PapagoTranslator
|
||||
|
||||
|
||||
# TODO: Discussion: These should be declared in setup.cfg, setting them here is redundant
|
||||
__author__ = """Nidhal Baccouri"""
|
||||
__email__ = 'nidhalbacc@gmail.com'
|
||||
__version__ = '1.3.2'
|
||||
__version__ = '1.5.0'
|
||||
|
||||
__all__ = [GoogleTranslator,
|
||||
PonsTranslator,
|
||||
LingueeTranslator,
|
||||
MyMemoryTranslator,
|
||||
YandexTranslator,
|
||||
QCRI,
|
||||
DeepL,
|
||||
single_detection,
|
||||
batch_detection]
|
||||
__all__ = [
|
||||
"GoogleTranslator",
|
||||
"PonsTranslator",
|
||||
"LingueeTranslator",
|
||||
"MyMemoryTranslator",
|
||||
"YandexTranslator",
|
||||
"MicrosoftTranslator",
|
||||
"QCRI",
|
||||
"DeepL",
|
||||
"single_detection",
|
||||
"batch_detection"
|
||||
]
|
||||
|
|
|
@ -1,52 +0,0 @@
|
|||
"""Console script for deep_translator."""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from .google_trans import GoogleTranslator
|
||||
from .mymemory import MyMemoryTranslator
|
||||
from .pons import PonsTranslator
|
||||
from .linguee import LingueeTranslator
|
||||
|
||||
|
||||
def translate(args):
|
||||
"""
|
||||
function used to provide translations from the parsed terminal arguments
|
||||
@param args: parsed terminal arguments
|
||||
@return: None
|
||||
"""
|
||||
translator = None
|
||||
if args.translator == 'google':
|
||||
translator = GoogleTranslator(source=args.source, target=args.target)
|
||||
elif args.translator == 'pons':
|
||||
translator = PonsTranslator(source=args.source, target=args.target)
|
||||
elif args.translator == 'linguee':
|
||||
translator = LingueeTranslator(source=args.source, target=args.target)
|
||||
elif args.translator == 'mymemory':
|
||||
translator = MyMemoryTranslator(source=args.source, target=args.target)
|
||||
else:
|
||||
print("given translator is not supported. Please use a supported translator from the deep_translator tool")
|
||||
|
||||
res = translator.translate(args.text)
|
||||
print(" | Translation from {} to {} |".format(args.source, args.target))
|
||||
print("Translated text: \n {}".format(res))
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
function responsible for parsing terminal arguments and provide them for further use in the translation process
|
||||
|
||||
"""
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--translator', '-trans',
|
||||
default='google', type=str, help="name of the translator you want to use")
|
||||
parser.add_argument('--source', '-src', type=str, help="source language to translate from", required=True)
|
||||
parser.add_argument('--target', '-tg', type=str, help="target language to translate to", required=True)
|
||||
parser.add_argument('--text', '-txt', type=str, help="text you want to translate", required=True)
|
||||
|
||||
args = parser.parse_args()
|
||||
translate(args)
|
||||
# sys.exit()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,11 +0,0 @@
|
|||
"""
|
||||
configuration object that holds data about the language detection api
|
||||
"""
|
||||
|
||||
config = {
|
||||
"url": 'https://ws.detectlanguage.com/0.2/detect',
|
||||
"headers": {
|
||||
'User-Agent': 'Detect Language API Python Client 1.4.0',
|
||||
'Authorization': 'Bearer {}',
|
||||
}
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
|
||||
import requests
|
||||
|
||||
BASE_URLS = {
|
||||
"GOOGLE_TRANSLATE": "https://translate.google.com/m",
|
||||
|
@ -7,7 +7,11 @@ BASE_URLS = {
|
|||
"LINGUEE": "https://www.linguee.com/",
|
||||
"MYMEMORY": "http://api.mymemory.translated.net/get",
|
||||
"QCRI": "https://mt.qcri.org/api/v1/{endpoint}?",
|
||||
"DEEPL": "https://api.deepl.com/{version}/"
|
||||
"DEEPL": "https://api.deepl.com/{version}/",
|
||||
"DEEPL_FREE": "https://api-free.deepl.com/v2/",
|
||||
"MICROSOFT_TRANSLATE": "https://api.cognitive.microsofttranslator.com/translate?api-version=3.0",
|
||||
"PAPAGO": "https://papago.naver.com/",
|
||||
"PAPAGO_API": "https://openapi.naver.com/v1/papago/n2mt"
|
||||
}
|
||||
|
||||
GOOGLE_CODES_TO_LANGUAGES = {
|
||||
|
@ -25,6 +29,7 @@ GOOGLE_CODES_TO_LANGUAGES = {
|
|||
'ca': 'catalan',
|
||||
'ceb': 'cebuano',
|
||||
'ny': 'chichewa',
|
||||
'zh': 'chinese',
|
||||
'zh-cn': 'chinese (simplified)',
|
||||
'zh-tw': 'chinese (traditional)',
|
||||
'co': 'corsican',
|
||||
|
@ -158,7 +163,7 @@ LINGUEE_LANGUAGES_TO_CODES = {
|
|||
"hungarian": "hu",
|
||||
"romanian": "ro",
|
||||
"russian": "ru",
|
||||
#"serbian": "sr",
|
||||
# "serbian": "sr",
|
||||
"dutch": "nl",
|
||||
"slovakian": "sk",
|
||||
"greek": "el",
|
||||
|
@ -169,7 +174,7 @@ LINGUEE_LANGUAGES_TO_CODES = {
|
|||
"finnish": "fi",
|
||||
"chinese": "zh",
|
||||
"french": "fr",
|
||||
#"croatian": "hr",
|
||||
# "croatian": "hr",
|
||||
"czech": "cs",
|
||||
"laotian": "lo",
|
||||
"swedish": "sv",
|
||||
|
@ -181,3 +186,65 @@ LINGUEE_LANGUAGES_TO_CODES = {
|
|||
LINGUEE_CODE_TO_LANGUAGE = {v: k for k, v in LINGUEE_LANGUAGES_TO_CODES.items()}
|
||||
|
||||
# "72e9e2cc7c992db4dcbdd6fb9f91a0d1"
|
||||
|
||||
# obtaining the current list of supported Microsoft languages for translation
|
||||
|
||||
microsoft_languages_api_url = "https://api.cognitive.microsofttranslator.com/languages?api-version=3.0&scope=translation"
|
||||
microsoft_languages_response = requests.get(microsoft_languages_api_url)
|
||||
translation_dict = microsoft_languages_response.json()['translation']
|
||||
|
||||
MICROSOFT_CODES_TO_LANGUAGES = {translation_dict[k]['name'].lower(): k for k in translation_dict.keys()}
|
||||
|
||||
DEEPL_LANGUAGE_TO_CODE = {
|
||||
"bulgarian": "bg",
|
||||
"czech": "cs",
|
||||
"danish": "da",
|
||||
"german": "de",
|
||||
"greek": "el",
|
||||
"english": "en",
|
||||
"spanish": "es",
|
||||
"estonian": "et",
|
||||
"finnish": "fi",
|
||||
"french": "fr",
|
||||
"hungarian": "hu",
|
||||
"italian": "it",
|
||||
"japanese": "ja",
|
||||
"lithuanian": "lt",
|
||||
"latvian": "lv",
|
||||
"dutch": "nl",
|
||||
"polish": "pl",
|
||||
"portuguese": "pt",
|
||||
"romanian": "ro",
|
||||
"russian": "ru",
|
||||
"slovak": "sk",
|
||||
"slovenian": "sl",
|
||||
"swedish": "sv",
|
||||
"chinese": "zh"
|
||||
}
|
||||
|
||||
DEEPL_CODE_TO_LANGUAGE = {v: k for k, v in DEEPL_LANGUAGE_TO_CODE.items()}
|
||||
|
||||
PAPAGO_CODE_TO_LANGUAGE = {
|
||||
'ko': 'Korean',
|
||||
'en': 'English',
|
||||
'ja': 'Japanese',
|
||||
'zh-CN': 'Chinese',
|
||||
'zh-TW': 'Chinese traditional',
|
||||
'es': 'Spanish',
|
||||
'fr': 'French',
|
||||
'vi': 'Vietnamese',
|
||||
'th': 'Thai',
|
||||
'id': 'Indonesia'
|
||||
}
|
||||
|
||||
PAPAGO_LANGUAGE_TO_CODE = {v: k for v, k in PAPAGO_CODE_TO_LANGUAGE.items()}
|
||||
|
||||
QCRI_CODE_TO_LANGUAGE = {
|
||||
'ar': 'Arabic',
|
||||
'en': 'English',
|
||||
'es': 'Spanish'
|
||||
}
|
||||
|
||||
QCRI_LANGUAGE_TO_CODE = {
|
||||
v: k for k, v in QCRI_CODE_TO_LANGUAGE.items()
|
||||
}
|
|
@ -1,59 +1,89 @@
|
|||
|
||||
import requests
|
||||
from requests.utils import requote_uri
|
||||
from deep_translator.constants import BASE_URLS
|
||||
from deep_translator.exceptions import (RequestError,
|
||||
ServerException, TranslationNotFound, TooManyRequests)
|
||||
from .constants import BASE_URLS, DEEPL_LANGUAGE_TO_CODE
|
||||
from .exceptions import (ServerException,
|
||||
TranslationNotFound,
|
||||
LanguageNotSupportedException,
|
||||
AuthorizationException)
|
||||
|
||||
|
||||
class DeepL(object):
|
||||
"""
|
||||
class that wraps functions, which use the DeepL translator under the hood to translate word(s)
|
||||
"""
|
||||
_languages = DEEPL_LANGUAGE_TO_CODE
|
||||
|
||||
def __init__(self, api_key=None):
|
||||
def __init__(self, api_key=None, source="en", target="en", use_free_api=True, **kwargs):
|
||||
"""
|
||||
@param api_key: your DeepL api key. Get one here: https://www.deepl.com/docs-api/accessing-the-api/
|
||||
@param api_key: your DeepL api key.
|
||||
Get one here: https://www.deepl.com/docs-api/accessing-the-api/
|
||||
@param source: source language
|
||||
@param target: target language
|
||||
"""
|
||||
|
||||
if not api_key:
|
||||
raise ServerException(401)
|
||||
self.version = 'v2'
|
||||
self.api_key = api_key
|
||||
self.__base_url = BASE_URLS.get("DEEPL").format(version=self.version)
|
||||
self.source = self._map_language_to_code(source)
|
||||
self.target = self._map_language_to_code(target)
|
||||
if use_free_api:
|
||||
self.__base_url = BASE_URLS.get("DEEPL_FREE").format(version=self.version)
|
||||
else:
|
||||
self.__base_url = BASE_URLS.get("DEEPL").format(version=self.version)
|
||||
|
||||
def translate(self, source, target, text):
|
||||
def translate(self, text, **kwargs):
|
||||
"""
|
||||
@param text: text to translate
|
||||
@return: translated text
|
||||
"""
|
||||
# Create the request parameters.
|
||||
translate_endpoint = 'translate'
|
||||
params = {
|
||||
"auth_key": self.api_key,
|
||||
"target_lang": target,
|
||||
"source_lang": source,
|
||||
"source_lang": self.source,
|
||||
"target_lang": self.target,
|
||||
"text": text
|
||||
}
|
||||
# Do the request and check the connection.
|
||||
try:
|
||||
response = requests.get(self.__base_url, params=params)
|
||||
response = requests.get(self.__base_url + translate_endpoint, params=params)
|
||||
except ConnectionError:
|
||||
raise ServerException(503)
|
||||
# If the answer is not success, raise server exception.
|
||||
if response.status_code == 403:
|
||||
raise AuthorizationException(self.api_key)
|
||||
elif response.status_code != 200:
|
||||
raise ServerException(response.status_code)
|
||||
# Get the response and check is not empty.
|
||||
res = response.json()
|
||||
if not res:
|
||||
raise TranslationNotFound(text)
|
||||
# Process and return the response.
|
||||
return res['translations'][0]['text']
|
||||
|
||||
else:
|
||||
if response.status_code != 200:
|
||||
ServerException(response.status_code)
|
||||
else:
|
||||
res = response.json()
|
||||
if not res:
|
||||
raise TranslationNotFound(text)
|
||||
return res
|
||||
|
||||
def translate_batch(self, source, target, batch):
|
||||
def translate_batch(self, batch, **kwargs):
|
||||
"""
|
||||
translate a batch of texts
|
||||
@param source: source language
|
||||
@param target: target language
|
||||
@param batch: list of texts to translate
|
||||
@return: list of translations
|
||||
"""
|
||||
return [self.translate(source, target, text) for text in batch]
|
||||
return [self.translate(text, **kwargs) for text in batch]
|
||||
|
||||
@staticmethod
|
||||
def get_supported_languages(as_dict=False, **kwargs):
|
||||
return [*DeepL._languages.keys()] if not as_dict else DeepL._languages
|
||||
|
||||
def _is_language_supported(self, lang, **kwargs):
|
||||
# The language is supported when is in the dicionary.
|
||||
return lang == 'auto' or lang in self._languages.keys() or lang in self._languages.values()
|
||||
|
||||
def _map_language_to_code(self, lang, **kwargs):
|
||||
if lang in self._languages.keys():
|
||||
return self._languages[lang]
|
||||
elif lang in self._languages.values():
|
||||
return lang
|
||||
raise LanguageNotSupportedException(lang)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
d = DeepL(api_key="key")
|
||||
print(d)
|
||||
d = DeepL(target="de")
|
||||
t = d.translate("I have no idea")
|
||||
print("text: ", t)
|
||||
|
|
|
@ -2,11 +2,12 @@
|
|||
language detection API
|
||||
"""
|
||||
import requests
|
||||
from deep_translator.configs import config
|
||||
from requests.exceptions import HTTPError
|
||||
|
||||
# Module global config
|
||||
config = {"url": 'https://ws.detectlanguage.com/0.2/detect',"headers": {'User-Agent': 'Detect Language API Python Client 1.4.0','Authorization': 'Bearer {}',}}
|
||||
|
||||
def get_request_body(text, api_key, *args):
|
||||
def get_request_body(text, api_key, *args, **kwargs):
|
||||
"""
|
||||
send a request and return the response body parsed as dictionary
|
||||
|
||||
|
@ -58,7 +59,7 @@ def single_detection(text, api_key=None, detailed=False, *args, **kwargs):
|
|||
return lang
|
||||
|
||||
|
||||
def batch_detection(text_list, api_key, detailed=False, *args):
|
||||
def batch_detection(text_list, api_key, detailed=False, *args, **kwargs):
|
||||
"""
|
||||
function responsible for detecting the language from a text
|
||||
|
||||
|
|
|
@ -36,6 +36,18 @@ class NotValidPayload(BaseError):
|
|||
super(NotValidPayload, self).__init__(val, message)
|
||||
|
||||
|
||||
class InvalidSourceOrTargetLanguage(BaseError):
|
||||
"""
|
||||
exception thrown if the user enters an invalid payload
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
val,
|
||||
message="source and target language can't be the same"):
|
||||
super(InvalidSourceOrTargetLanguage, self).__init__(val, message)
|
||||
|
||||
|
||||
|
||||
class TranslationNotFound(BaseError):
|
||||
"""
|
||||
exception thrown if no translation was found for the text provided by the user
|
||||
|
@ -70,7 +82,7 @@ class NotValidLength(BaseError):
|
|||
|
||||
class RequestError(Exception):
|
||||
"""
|
||||
exception thrown if an error occured during the request call, e.g a connection problem.
|
||||
exception thrown if an error occurred during the request call, e.g a connection problem.
|
||||
"""
|
||||
|
||||
def __init__(self, message="Request exception can happen due to an api connection error. "
|
||||
|
@ -81,9 +93,22 @@ class RequestError(Exception):
|
|||
return self.message
|
||||
|
||||
|
||||
class MicrosoftAPIerror(Exception):
|
||||
"""
|
||||
exception thrown if Microsoft API returns one of its errors
|
||||
"""
|
||||
|
||||
def __init__(self, api_message):
|
||||
self.api_message = str(api_message)
|
||||
self.message="Microsoft API returned the following error"
|
||||
|
||||
def __str__(self):
|
||||
return "{}: {}".format(self.message, self.api_message)
|
||||
|
||||
|
||||
class TooManyRequests(Exception):
|
||||
"""
|
||||
exception thrown if an error occured during the request call, e.g a connection problem.
|
||||
exception thrown if an error occurred during the request call, e.g a connection problem.
|
||||
"""
|
||||
|
||||
def __init__(self, message="Server Error: You made too many requests to the server. According to google, you are allowed to make 5 requests per second and up to 200k requests per day. You can wait and try again later or you can try the translate_batch function"):
|
||||
|
@ -111,3 +136,9 @@ class ServerException(Exception):
|
|||
def __init__(self, status_code, *args):
|
||||
message = self.errors.get(status_code, "API server error")
|
||||
super(ServerException, self).__init__(message, *args)
|
||||
|
||||
|
||||
class AuthorizationException(Exception):
|
||||
def __init__(self, api_key, *args):
|
||||
msg = 'Unauthorized access with the api key ' + api_key
|
||||
super().__init__(msg, *args)
|
||||
|
|
|
@ -2,9 +2,9 @@
|
|||
google translator API
|
||||
"""
|
||||
|
||||
from deep_translator.constants import BASE_URLS, GOOGLE_LANGUAGES_TO_CODES
|
||||
from deep_translator.exceptions import TooManyRequests, LanguageNotSupportedException, TranslationNotFound, NotValidPayload, RequestError
|
||||
from deep_translator.parent import BaseTranslator
|
||||
from .constants import BASE_URLS, GOOGLE_LANGUAGES_TO_CODES
|
||||
from .exceptions import TooManyRequests, LanguageNotSupportedException, TranslationNotFound, NotValidPayload, RequestError
|
||||
from .parent import BaseTranslator
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from time import sleep
|
||||
|
@ -19,12 +19,13 @@ class GoogleTranslator(BaseTranslator):
|
|||
_languages = GOOGLE_LANGUAGES_TO_CODES
|
||||
supported_languages = list(_languages.keys())
|
||||
|
||||
def __init__(self, source="auto", target="en"):
|
||||
def __init__(self, source="auto", target="en", proxies=None, **kwargs):
|
||||
"""
|
||||
@param source: source language to translate from
|
||||
@param target: target language to translate to
|
||||
"""
|
||||
self.__base_url = BASE_URLS.get("GOOGLE_TRANSLATE")
|
||||
self.proxies = proxies
|
||||
|
||||
if self.is_language_supported(source, target):
|
||||
self._source, self._target = self._map_language_to_code(source.lower(), target.lower())
|
||||
|
@ -36,12 +37,13 @@ class GoogleTranslator(BaseTranslator):
|
|||
element_query={"class": "t0"},
|
||||
payload_key='q', # key of text in the url
|
||||
tl=self._target,
|
||||
sl=self._source)
|
||||
sl=self._source,
|
||||
**kwargs)
|
||||
|
||||
self._alt_element_query = {"class": "result-container"}
|
||||
|
||||
@staticmethod
|
||||
def get_supported_languages(as_dict=False):
|
||||
def get_supported_languages(as_dict=False, **kwargs):
|
||||
"""
|
||||
return the supported languages by the google translator
|
||||
@param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
|
||||
|
@ -89,24 +91,34 @@ class GoogleTranslator(BaseTranslator):
|
|||
self._url_params[self.payload_key] = text
|
||||
|
||||
response = requests.get(self.__base_url,
|
||||
params=self._url_params, headers ={'User-agent': 'your bot 0.1'})
|
||||
|
||||
params=self._url_params,
|
||||
proxies=self.proxies)
|
||||
if response.status_code == 429:
|
||||
raise TooManyRequests()
|
||||
|
||||
if response.status_code != 200:
|
||||
# print("status code", response.status_code)
|
||||
raise RequestError()
|
||||
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
element = soup.find(self._element_tag, self._element_query)
|
||||
|
||||
if not element:
|
||||
element = soup.find(self._element_tag, self._alt_element_query)
|
||||
if not element:
|
||||
raise TranslationNotFound(text)
|
||||
if element.get_text(strip=True) == text.strip():
|
||||
to_translate_alpha = ''.join(ch for ch in text.strip() if ch.isalnum())
|
||||
translated_alpha = ''.join(ch for ch in element.get_text(strip=True) if ch.isalnum())
|
||||
if to_translate_alpha and translated_alpha and to_translate_alpha == translated_alpha:
|
||||
self._url_params["tl"] = self._target
|
||||
if "hl" not in self._url_params:
|
||||
return text.strip()
|
||||
del self._url_params["hl"]
|
||||
return self.translate(text)
|
||||
|
||||
return element.get_text(strip=True)
|
||||
else:
|
||||
return element.get_text(strip=True)
|
||||
|
||||
def translate_file(self, path, **kwargs):
|
||||
"""
|
||||
|
@ -118,9 +130,8 @@ class GoogleTranslator(BaseTranslator):
|
|||
"""
|
||||
try:
|
||||
with open(path) as f:
|
||||
text = f.read()
|
||||
|
||||
return self.translate(text=text)
|
||||
text = f.read().strip()
|
||||
return self.translate(text)
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
|
@ -149,7 +160,7 @@ class GoogleTranslator(BaseTranslator):
|
|||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def translate_batch(self, batch=None):
|
||||
def translate_batch(self, batch=None, **kwargs):
|
||||
"""
|
||||
translate a list of texts
|
||||
@param batch: list of texts you want to translate
|
||||
|
@ -158,16 +169,21 @@ class GoogleTranslator(BaseTranslator):
|
|||
if not batch:
|
||||
raise Exception("Enter your text list that you want to translate")
|
||||
|
||||
print("Please wait.. This may take a couple of seconds because deep_translator sleeps "
|
||||
"for two seconds after each request in order to not spam the google server.")
|
||||
arr = []
|
||||
for text in batch:
|
||||
translated = self.translate(text)
|
||||
for i, text in enumerate(batch):
|
||||
|
||||
translated = self.translate(text, **kwargs)
|
||||
arr.append(translated)
|
||||
print("sentence number ", i+1, " has been translated successfully")
|
||||
sleep(2)
|
||||
|
||||
return arr
|
||||
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# for _ in range(10):
|
||||
# txt = GoogleTranslator(source="en", target="ar").translate("Hello how are you")
|
||||
# print("text: ", txt)
|
||||
|
||||
if __name__ == '__main__':
|
||||
translator = GoogleTranslator(source='ru', target='uk')
|
||||
t = translator.translate("Я разработчик") # => "I am a developer"
|
||||
print(t)
|
||||
|
|
|
@ -2,14 +2,14 @@
|
|||
linguee translator API
|
||||
"""
|
||||
|
||||
from deep_translator.constants import BASE_URLS, LINGUEE_LANGUAGES_TO_CODES, LINGUEE_CODE_TO_LANGUAGE
|
||||
from deep_translator.exceptions import (LanguageNotSupportedException,
|
||||
TranslationNotFound,
|
||||
NotValidPayload,
|
||||
ElementNotFoundInGetRequest,
|
||||
RequestError,
|
||||
TooManyRequests)
|
||||
from deep_translator.parent import BaseTranslator
|
||||
from .constants import BASE_URLS, LINGUEE_LANGUAGES_TO_CODES, LINGUEE_CODE_TO_LANGUAGE
|
||||
from .exceptions import (LanguageNotSupportedException,
|
||||
TranslationNotFound,
|
||||
NotValidPayload,
|
||||
ElementNotFoundInGetRequest,
|
||||
RequestError,
|
||||
TooManyRequests)
|
||||
from .parent import BaseTranslator
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from requests.utils import requote_uri
|
||||
|
@ -22,12 +22,13 @@ class LingueeTranslator(BaseTranslator):
|
|||
_languages = LINGUEE_LANGUAGES_TO_CODES
|
||||
supported_languages = list(_languages.keys())
|
||||
|
||||
def __init__(self, source, target="en"):
|
||||
def __init__(self, source, target="en", proxies=None, **kwargs):
|
||||
"""
|
||||
@param source: source language to translate from
|
||||
@param target: target language to translate to
|
||||
"""
|
||||
self.__base_url = BASE_URLS.get("LINGUEE")
|
||||
self.proxies = proxies
|
||||
|
||||
if self.is_language_supported(source, target):
|
||||
self._source, self._target = self._map_language_to_code(source.lower(), target.lower())
|
||||
|
@ -41,7 +42,7 @@ class LingueeTranslator(BaseTranslator):
|
|||
)
|
||||
|
||||
@staticmethod
|
||||
def get_supported_languages(as_dict=False):
|
||||
def get_supported_languages(as_dict=False, **kwargs):
|
||||
"""
|
||||
return the supported languages by the linguee translator
|
||||
@param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
|
||||
|
@ -88,7 +89,7 @@ class LingueeTranslator(BaseTranslator):
|
|||
# %s-%s/translation/%s.html
|
||||
url = "{}{}-{}/translation/{}.html".format(self.__base_url, self._source, self._target, word)
|
||||
url = requote_uri(url)
|
||||
response = requests.get(url)
|
||||
response = requests.get(url, proxies=self.proxies)
|
||||
|
||||
if response.status_code == 429:
|
||||
raise TooManyRequests()
|
||||
|
@ -125,6 +126,6 @@ class LingueeTranslator(BaseTranslator):
|
|||
|
||||
translated_words = []
|
||||
for word in words:
|
||||
translated_words.append(self.translate(payload=word))
|
||||
translated_words.append(self.translate(word=word, **kwargs))
|
||||
return translated_words
|
||||
|
||||
|
|
124
libs/deep_translator/main.py
Normal file
124
libs/deep_translator/main.py
Normal file
|
@ -0,0 +1,124 @@
|
|||
"""Console script for deep_translator."""
|
||||
|
||||
import click
|
||||
from .google_trans import GoogleTranslator
|
||||
from .mymemory import MyMemoryTranslator
|
||||
from .deepl import DeepL
|
||||
from .qcri import QCRI
|
||||
from .linguee import LingueeTranslator
|
||||
from .pons import PonsTranslator
|
||||
from .yandex import YandexTranslator
|
||||
from .microsoft import MicrosoftTranslator
|
||||
from .papago import PapagoTranslator
|
||||
|
||||
CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])
|
||||
@click.group()
|
||||
def cli():
|
||||
pass
|
||||
|
||||
@cli.command(context_settings=CONTEXT_SETTINGS, no_args_is_help=True)
|
||||
@click.argument('translator', required=True, default='google', type=str)
|
||||
@click.option("--source", "-src", required=True, type=str, help="source language to translate from")
|
||||
@click.option("--target", "-tgt", required=True, type=str, help="target language to translate to")
|
||||
@click.option("--text", "-txt", type=str,required = True,prompt="Enter the text you want to translate",help="text you want to translate")
|
||||
@click.option("--api-key",type=str,help="required for DeepL, QCRI, Yandex, Microsoft and Papago translators")
|
||||
def translate(translator, source, target, text, api_key):
|
||||
"""
|
||||
Use TRANSLATOR to translate source material into another language.
|
||||
\f
|
||||
Directory function to send arguments to the correct translator.
|
||||
@param translator: translator name parsed from terminal arguments
|
||||
@return: None
|
||||
"""
|
||||
api_key_required = ["deepl", "qcri", "yandex", "microsoft", "papago"]
|
||||
if translator in api_key_required and not api_key:
|
||||
click.echo(
|
||||
"This translator requires an api key provided through --api-key")
|
||||
else:
|
||||
pass
|
||||
|
||||
if translator == "google":
|
||||
translator = GoogleTranslator(source=source, target=target)
|
||||
elif translator == "mymemory":
|
||||
translator = MyMemoryTranslator(source=source, target=target)
|
||||
elif translator == "deepl":
|
||||
translator = DeepL(source=source, target=target, api_key=api_key)
|
||||
elif translator == "qcri":
|
||||
translator = QCRI(source=source, target=target, api_key=api_key)
|
||||
elif translator == "linguee":
|
||||
translator = LingueeTranslator(source=source, target=target)
|
||||
elif translator == "pons":
|
||||
translator = PonsTranslator(source=source, target=target)
|
||||
elif translator == "yandex":
|
||||
translator = YandexTranslator(
|
||||
source=source,
|
||||
target=target,
|
||||
api_key=api_key)
|
||||
elif translator == "microsoft":
|
||||
translator = MicrosoftTranslator(
|
||||
source=source,
|
||||
target=target,
|
||||
api_key=api_key)
|
||||
elif translator == "papago":
|
||||
translator = PapagoTranslator(
|
||||
source=source,
|
||||
target=target,
|
||||
api_key=api_key)
|
||||
else:
|
||||
raise AttributeError("The given translator is not supported.")
|
||||
|
||||
res = translator.translate(text)
|
||||
click.echo(f" | Translation from {source} to {target} |")
|
||||
click.echo(f"Translated text: \n {res}")
|
||||
return 0
|
||||
|
||||
@cli.command(context_settings=CONTEXT_SETTINGS, no_args_is_help=True)
|
||||
@click.argument('translator')
|
||||
@click.argument('api_key', required=False)
|
||||
def languages(translator, api_key):
|
||||
"""
|
||||
Retrieve the list of available languages from the given translator.
|
||||
@param translator: Translator given by the user.
|
||||
@param api_key: Optional API key given by the user. Required for some translators.
|
||||
@return: None
|
||||
"""
|
||||
translator = translator.lower()
|
||||
api_key_required = ["deepl", "qcri", "yandex", "microsoft", "papago"]
|
||||
if translator in api_key_required and not api_key:
|
||||
click.echo("This translator requires an api key provided through --api-key")
|
||||
else:
|
||||
pass
|
||||
|
||||
if translator == "google":
|
||||
translator = GoogleTranslator
|
||||
elif translator == "mymemory":
|
||||
translator = MyMemoryTranslator
|
||||
elif translator == "qcri":
|
||||
translator = QCRI(api_key=api_key)
|
||||
elif translator == "linguee":
|
||||
translator = LingueeTranslator
|
||||
elif translator == "pons":
|
||||
translator = PonsTranslator
|
||||
elif translator == "yandex":
|
||||
translator = YandexTranslator(api_key=api_key)
|
||||
elif translator == "microsoft":
|
||||
translator = MicrosoftTranslator(api_key=api_key)
|
||||
elif translator == "papago":
|
||||
translator = PapagoTranslator(api_key=api_key)
|
||||
else:
|
||||
raise AttributeError("The given translator is not supported.")
|
||||
|
||||
supported_languages = translator.get_supported_languages(as_dict=True)
|
||||
click.echo(f"Languages supported by '{translator}' are :")
|
||||
for k, v in supported_languages.items():
|
||||
click.echo(f"|- {k}: {v}")
|
||||
return 0
|
||||
|
||||
@cli.command()
|
||||
def list():
|
||||
"""Lists available translators."""
|
||||
click.echo("Available translators include: Google, MyMemory, QCRI, Linguee, Pons, Yandex, Microsoft (Bing), and Papago.")
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli()
|
146
libs/deep_translator/microsoft.py
Normal file
146
libs/deep_translator/microsoft.py
Normal file
|
@ -0,0 +1,146 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
import requests
|
||||
import logging
|
||||
import sys
|
||||
|
||||
from .constants import BASE_URLS, MICROSOFT_CODES_TO_LANGUAGES
|
||||
from .exceptions import LanguageNotSupportedException, ServerException, MicrosoftAPIerror
|
||||
|
||||
|
||||
class MicrosoftTranslator:
|
||||
"""
|
||||
the class that wraps functions, which use the Microsoft translator under the hood to translate word(s)
|
||||
"""
|
||||
|
||||
_languages = MICROSOFT_CODES_TO_LANGUAGES
|
||||
supported_languages = list(_languages.values())
|
||||
|
||||
def __init__(self, api_key=None, region=None, source=None, target=None, proxies=None, **kwargs):
|
||||
"""
|
||||
@params api_key and target are the required params
|
||||
@param api_key: your Microsoft API key
|
||||
@param region: your Microsoft Location
|
||||
"""
|
||||
if not api_key:
|
||||
raise ServerException(401)
|
||||
else:
|
||||
self.api_key = api_key
|
||||
|
||||
self.proxies = proxies
|
||||
self.headers = {
|
||||
"Ocp-Apim-Subscription-Key": self.api_key,
|
||||
"Content-type": "application/json",
|
||||
}
|
||||
# region is not required but very common and goes to headers if passed
|
||||
if region:
|
||||
self.region = region
|
||||
self.headers["Ocp-Apim-Subscription-Region"] = self.region
|
||||
|
||||
if not target:
|
||||
raise ServerException(401)
|
||||
else:
|
||||
if type(target) is str:
|
||||
self.target = target.lower()
|
||||
else:
|
||||
self.target = [i.lower() for i in target]
|
||||
if self.is_language_supported(self.target):
|
||||
self.target = self._map_language_to_code(self.target)
|
||||
|
||||
self.url_params = {'to': self.target, **kwargs}
|
||||
|
||||
if source:
|
||||
self.source = source.lower()
|
||||
if self.is_language_supported(self.source):
|
||||
self.source = self._map_language_to_code(self.source)
|
||||
self.url_params['from'] = self.source
|
||||
|
||||
self.__base_url = BASE_URLS.get("MICROSOFT_TRANSLATE")
|
||||
|
||||
@staticmethod
|
||||
def get_supported_languages(as_dict=False, **kwargs):
|
||||
"""
|
||||
return the languages supported by the microsoft translator
|
||||
@param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
|
||||
@return: list or dict
|
||||
"""
|
||||
return MicrosoftTranslator.supported_languages if not as_dict else MicrosoftTranslator._languages
|
||||
|
||||
def _map_language_to_code(self, language, **kwargs):
|
||||
"""
|
||||
map the language to its corresponding code (abbreviation) if the language was passed by its full name by the user
|
||||
@param language: a string (if 1 lang) or a list (if multiple langs)
|
||||
@return: mapped value of the language or raise an exception if the language is not supported
|
||||
"""
|
||||
if type(language) is str:
|
||||
language = [language]
|
||||
for lang in language:
|
||||
if lang in self._languages.values():
|
||||
yield lang
|
||||
elif lang in self._languages.keys():
|
||||
yield self._languages[lang]
|
||||
else:
|
||||
raise LanguageNotSupportedException(lang)
|
||||
|
||||
def is_language_supported(self, language, **kwargs):
|
||||
"""
|
||||
check if the language is supported by the translator
|
||||
@param language: a string (if 1 lang) or a list (if multiple langs)
|
||||
@return: bool or raise an Exception
|
||||
"""
|
||||
if type(language) is str:
|
||||
language = [language]
|
||||
for lang in language:
|
||||
if lang not in self._languages.keys():
|
||||
if lang not in self._languages.values():
|
||||
raise LanguageNotSupportedException(lang)
|
||||
return True
|
||||
|
||||
def translate(self, text, **kwargs):
|
||||
"""
|
||||
function that uses microsoft translate to translate a text
|
||||
@param text: desired text to translate
|
||||
@return: str: translated text
|
||||
"""
|
||||
# a body must be a list of dicts to process multiple texts;
|
||||
# I have not added multiple text processing here since it is covered by the translate_batch method
|
||||
valid_microsoft_json = [{'text': text}]
|
||||
try:
|
||||
requested = requests.post(self.__base_url,
|
||||
params=self.url_params,
|
||||
headers=self.headers,
|
||||
json=valid_microsoft_json,
|
||||
proxies=self.proxies)
|
||||
except requests.exceptions.RequestException:
|
||||
exc_type, value, traceback = sys.exc_info()
|
||||
logging.warning(f"Returned error: {exc_type.__name__}")
|
||||
|
||||
# Where Microsoft API responds with an api error, it returns a dict in response.json()
|
||||
if type(requested.json()) is dict:
|
||||
error_message = requested.json()['error']
|
||||
raise MicrosoftAPIerror(error_message)
|
||||
# Where it responds with a translation, its response.json() is a list e.g. [{'translations': [{'text': 'Hello world!', 'to': 'en'}]}]
|
||||
elif type(requested.json()) is list:
|
||||
all_translations = [i['text'] for i in requested.json()[0]['translations']]
|
||||
return "\n".join(all_translations)
|
||||
|
||||
def translate_file(self, path, **kwargs):
|
||||
"""
|
||||
translate from a file
|
||||
@param path: path to file
|
||||
@return: translated text
|
||||
"""
|
||||
try:
|
||||
with open(path) as f:
|
||||
text = f.read().strip()
|
||||
return self.translate(text)
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def translate_batch(self, batch, **kwargs):
|
||||
"""
|
||||
translate a batch of texts
|
||||
@param batch: list of texts to translate
|
||||
@return: list of translations
|
||||
"""
|
||||
return [self.translate(text, **kwargs) for text in batch]
|
|
@ -4,13 +4,13 @@ mymemory translator API
|
|||
import logging
|
||||
import warnings
|
||||
|
||||
from deep_translator.constants import BASE_URLS, GOOGLE_LANGUAGES_TO_CODES
|
||||
from deep_translator.exceptions import (NotValidPayload,
|
||||
TranslationNotFound,
|
||||
LanguageNotSupportedException,
|
||||
RequestError,
|
||||
TooManyRequests)
|
||||
from deep_translator.parent import BaseTranslator
|
||||
from .constants import BASE_URLS, GOOGLE_LANGUAGES_TO_CODES
|
||||
from .exceptions import (NotValidPayload,
|
||||
TranslationNotFound,
|
||||
LanguageNotSupportedException,
|
||||
RequestError,
|
||||
TooManyRequests)
|
||||
from .parent import BaseTranslator
|
||||
import requests
|
||||
from time import sleep
|
||||
|
||||
|
@ -22,12 +22,13 @@ class MyMemoryTranslator(BaseTranslator):
|
|||
_languages = GOOGLE_LANGUAGES_TO_CODES
|
||||
supported_languages = list(_languages.keys())
|
||||
|
||||
def __init__(self, source="auto", target="en", **kwargs):
|
||||
def __init__(self, source="auto", target="en", proxies=None, **kwargs):
|
||||
"""
|
||||
@param source: source language to translate from
|
||||
@param target: target language to translate to
|
||||
"""
|
||||
self.__base_url = BASE_URLS.get("MYMEMORY")
|
||||
self.proxies = proxies
|
||||
if self.is_language_supported(source, target):
|
||||
self._source, self._target = self._map_language_to_code(source.lower(), target.lower())
|
||||
self._source = self._source if self._source != 'auto' else 'Lao'
|
||||
|
@ -40,7 +41,7 @@ class MyMemoryTranslator(BaseTranslator):
|
|||
langpair='{}|{}'.format(self._source, self._target))
|
||||
|
||||
@staticmethod
|
||||
def get_supported_languages(as_dict=False):
|
||||
def get_supported_languages(as_dict=False, **kwargs):
|
||||
"""
|
||||
return the supported languages by the mymemory translator
|
||||
@param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
|
||||
|
@ -93,7 +94,8 @@ class MyMemoryTranslator(BaseTranslator):
|
|||
|
||||
response = requests.get(self.__base_url,
|
||||
params=self._url_params,
|
||||
headers=self.headers)
|
||||
headers=self.headers,
|
||||
proxies=self.proxies)
|
||||
|
||||
if response.status_code == 429:
|
||||
raise TooManyRequests()
|
||||
|
@ -150,13 +152,13 @@ class MyMemoryTranslator(BaseTranslator):
|
|||
"""
|
||||
try:
|
||||
with open(path) as f:
|
||||
text = f.read()
|
||||
text = f.read().strip()
|
||||
|
||||
return self.translate(text=text)
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def translate_batch(self, batch=None):
|
||||
def translate_batch(self, batch=None, **kwargs):
|
||||
"""
|
||||
translate a list of texts
|
||||
@param batch: list of texts you want to translate
|
||||
|
@ -167,7 +169,7 @@ class MyMemoryTranslator(BaseTranslator):
|
|||
|
||||
arr = []
|
||||
for text in batch:
|
||||
translated = self.translate(text)
|
||||
translated = self.translate(text, **kwargs)
|
||||
arr.append(translated)
|
||||
sleep(2)
|
||||
|
||||
|
|
154
libs/deep_translator/papago.py
Normal file
154
libs/deep_translator/papago.py
Normal file
|
@ -0,0 +1,154 @@
|
|||
"""
|
||||
google translator API
|
||||
"""
|
||||
import json
|
||||
from .constants import BASE_URLS, PAPAGO_LANGUAGE_TO_CODE
|
||||
from .exceptions import LanguageNotSupportedException, TranslationNotFound, NotValidPayload
|
||||
import requests
|
||||
import warnings
|
||||
import logging
|
||||
|
||||
|
||||
class PapagoTranslator(object):
|
||||
"""
|
||||
class that wraps functions, which use google translate under the hood to translate text(s)
|
||||
"""
|
||||
_languages = PAPAGO_LANGUAGE_TO_CODE
|
||||
supported_languages = list(_languages.keys())
|
||||
|
||||
def __init__(self, client_id=None, secret_key=None, source="auto", target="en", **kwargs):
|
||||
"""
|
||||
@param source: source language to translate from
|
||||
@param target: target language to translate to
|
||||
"""
|
||||
if not client_id or not secret_key:
|
||||
raise Exception("Please pass your client id and secret key! visit the papago website for more infos")
|
||||
|
||||
self.__base_url = BASE_URLS.get("PAPAGO_API")
|
||||
self.client_id = client_id
|
||||
self.secret_key = secret_key
|
||||
if self.is_language_supported(source, target):
|
||||
self._source, self._target = self._map_language_to_code(source.lower(), target.lower())
|
||||
|
||||
@staticmethod
|
||||
def get_supported_languages(as_dict=False, **kwargs):
|
||||
"""
|
||||
return the supported languages by the google translator
|
||||
@param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
|
||||
@return: list or dict
|
||||
"""
|
||||
return PapagoTranslator.supported_languages if not as_dict else PapagoTranslator._languages
|
||||
|
||||
def _map_language_to_code(self, *languages):
|
||||
"""
|
||||
map language to its corresponding code (abbreviation) if the language was passed by its full name by the user
|
||||
@param languages: list of languages
|
||||
@return: mapped value of the language or raise an exception if the language is not supported
|
||||
"""
|
||||
for language in languages:
|
||||
if language in self._languages.values() or language == 'auto':
|
||||
yield language
|
||||
elif language in self._languages.keys():
|
||||
yield self._languages[language]
|
||||
else:
|
||||
raise LanguageNotSupportedException(language)
|
||||
|
||||
def is_language_supported(self, *languages):
|
||||
"""
|
||||
check if the language is supported by the translator
|
||||
@param languages: list of languages
|
||||
@return: bool or raise an Exception
|
||||
"""
|
||||
for lang in languages:
|
||||
if lang != 'auto' and lang not in self._languages.keys():
|
||||
if lang != 'auto' and lang not in self._languages.values():
|
||||
raise LanguageNotSupportedException(lang)
|
||||
return True
|
||||
|
||||
def translate(self, text, **kwargs):
|
||||
"""
|
||||
function that uses google translate to translate a text
|
||||
@param text: desired text to translate
|
||||
@return: str: translated text
|
||||
"""
|
||||
|
||||
payload = {
|
||||
"source": self._source,
|
||||
"target": self._target,
|
||||
"text": text
|
||||
}
|
||||
headers = {
|
||||
'X-Naver-Client-Id': self.client_id,
|
||||
'X-Naver-Client-Secret': self.secret_key,
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
|
||||
}
|
||||
response = requests.post(self.__base_url, headers=headers, data=payload)
|
||||
if response.status_code != 200:
|
||||
raise Exception(f'Translation error! -> status code: {response.status_code}')
|
||||
res_body = json.loads(response.text)
|
||||
if "message" not in res_body:
|
||||
raise TranslationNotFound(text)
|
||||
|
||||
msg = res_body.get("message")
|
||||
result = msg.get("result", None)
|
||||
if not result:
|
||||
raise TranslationNotFound(text)
|
||||
translated_text = result.get("translatedText")
|
||||
return translated_text
|
||||
|
||||
def translate_file(self, path, **kwargs):
|
||||
"""
|
||||
translate directly from file
|
||||
@param path: path to the target file
|
||||
@type path: str
|
||||
@param kwargs: additional args
|
||||
@return: str
|
||||
"""
|
||||
try:
|
||||
with open(path) as f:
|
||||
text = f.read().strip()
|
||||
return self.translate(text)
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def translate_sentences(self, sentences=None, **kwargs):
|
||||
"""
|
||||
translate many sentences together. This makes sense if you have sentences with different languages
|
||||
and you want to translate all to unified language. This is handy because it detects
|
||||
automatically the language of each sentence and then translate it.
|
||||
|
||||
@param sentences: list of sentences to translate
|
||||
@return: list of all translated sentences
|
||||
"""
|
||||
warnings.warn("deprecated. Use the translate_batch function instead", DeprecationWarning, stacklevel=2)
|
||||
logging.warning("deprecated. Use the translate_batch function instead")
|
||||
if not sentences:
|
||||
raise NotValidPayload(sentences)
|
||||
|
||||
translated_sentences = []
|
||||
try:
|
||||
for sentence in sentences:
|
||||
translated = self.translate(text=sentence)
|
||||
translated_sentences.append(translated)
|
||||
|
||||
return translated_sentences
|
||||
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def translate_batch(self, batch=None, **kwargs):
|
||||
"""
|
||||
translate a list of texts
|
||||
@param batch: list of texts you want to translate
|
||||
@return: list of translations
|
||||
"""
|
||||
if not batch:
|
||||
raise Exception("Enter your text list that you want to translate")
|
||||
arr = []
|
||||
for i, text in enumerate(batch):
|
||||
|
||||
translated = self.translate(text, **kwargs)
|
||||
arr.append(translated)
|
||||
return arr
|
||||
|
||||
|
|
@ -1,9 +1,8 @@
|
|||
"""parent translator class"""
|
||||
|
||||
from deep_translator.exceptions import NotValidPayload, NotValidLength
|
||||
from .exceptions import NotValidPayload, NotValidLength, InvalidSourceOrTargetLanguage
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
import string
|
||||
class BaseTranslator(ABC):
|
||||
"""
|
||||
Abstract class that serve as a parent translator for other different translators
|
||||
|
@ -20,6 +19,9 @@ class BaseTranslator(ABC):
|
|||
@param source: source language to translate from
|
||||
@param target: target language to translate to
|
||||
"""
|
||||
if source == target:
|
||||
raise InvalidSourceOrTargetLanguage(source)
|
||||
|
||||
self.__base_url = base_url
|
||||
self._source = source
|
||||
self._target = target
|
||||
|
@ -40,8 +42,13 @@ class BaseTranslator(ABC):
|
|||
@return: bool
|
||||
"""
|
||||
|
||||
if not payload or not isinstance(payload, str):
|
||||
if not payload or not isinstance(payload, str) or not payload.strip() or payload.isdigit():
|
||||
raise NotValidPayload(payload)
|
||||
|
||||
# check if payload contains only symbols
|
||||
if all(i in string.punctuation for i in payload):
|
||||
raise NotValidPayload(payload)
|
||||
|
||||
if not BaseTranslator.__check_length(payload, min_chars, max_chars):
|
||||
raise NotValidLength(payload, min_chars, max_chars)
|
||||
return True
|
||||
|
@ -55,7 +62,7 @@ class BaseTranslator(ABC):
|
|||
@param max_chars: maximum characters allowed
|
||||
@return: bool
|
||||
"""
|
||||
return True if min_chars < len(payload) < max_chars else False
|
||||
return True if min_chars <= len(payload) < max_chars else False
|
||||
|
||||
@abstractmethod
|
||||
def translate(self, text, **kwargs):
|
||||
|
|
|
@ -3,14 +3,14 @@ pons translator API
|
|||
"""
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from deep_translator.constants import BASE_URLS, PONS_LANGUAGES_TO_CODES, PONS_CODES_TO_LANGUAGES
|
||||
from deep_translator.exceptions import (LanguageNotSupportedException,
|
||||
TranslationNotFound,
|
||||
NotValidPayload,
|
||||
ElementNotFoundInGetRequest,
|
||||
RequestError,
|
||||
TooManyRequests)
|
||||
from deep_translator.parent import BaseTranslator
|
||||
from .constants import BASE_URLS, PONS_LANGUAGES_TO_CODES, PONS_CODES_TO_LANGUAGES
|
||||
from .exceptions import (LanguageNotSupportedException,
|
||||
TranslationNotFound,
|
||||
NotValidPayload,
|
||||
ElementNotFoundInGetRequest,
|
||||
RequestError,
|
||||
TooManyRequests)
|
||||
from .parent import BaseTranslator
|
||||
from requests.utils import requote_uri
|
||||
|
||||
|
||||
|
@ -21,13 +21,13 @@ class PonsTranslator(BaseTranslator):
|
|||
_languages = PONS_LANGUAGES_TO_CODES
|
||||
supported_languages = list(_languages.keys())
|
||||
|
||||
def __init__(self, source, target="english"):
|
||||
def __init__(self, source, target="en", proxies=None, **kwargs):
|
||||
"""
|
||||
@param source: source language to translate from
|
||||
@param target: target language to translate to
|
||||
"""
|
||||
self.__base_url = BASE_URLS.get("PONS")
|
||||
|
||||
self.proxies = proxies
|
||||
if self.is_language_supported(source, target):
|
||||
self._source, self._target = self._map_language_to_code(source, target)
|
||||
|
||||
|
@ -40,7 +40,7 @@ class PonsTranslator(BaseTranslator):
|
|||
)
|
||||
|
||||
@staticmethod
|
||||
def get_supported_languages(as_dict=False):
|
||||
def get_supported_languages(as_dict=False, **kwargs):
|
||||
"""
|
||||
return the supported languages by the linguee translator
|
||||
@param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
|
||||
|
@ -86,7 +86,7 @@ class PonsTranslator(BaseTranslator):
|
|||
if self._validate_payload(word, max_chars=50):
|
||||
url = "{}{}-{}/{}".format(self.__base_url, self._source, self._target, word)
|
||||
url = requote_uri(url)
|
||||
response = requests.get(url)
|
||||
response = requests.get(url, proxies=self.proxies)
|
||||
|
||||
if response.status_code == 429:
|
||||
raise TooManyRequests()
|
||||
|
@ -131,6 +131,6 @@ class PonsTranslator(BaseTranslator):
|
|||
|
||||
translated_words = []
|
||||
for word in words:
|
||||
translated_words.append(self.translate(payload=word))
|
||||
translated_words.append(self.translate(word=word, **kwargs))
|
||||
return translated_words
|
||||
|
||||
|
|
|
@ -1,17 +1,14 @@
|
|||
|
||||
import requests
|
||||
from requests.utils import requote_uri
|
||||
from deep_translator.constants import BASE_URLS
|
||||
from deep_translator.exceptions import (RequestError,
|
||||
ServerException, TranslationNotFound, TooManyRequests)
|
||||
|
||||
from .constants import BASE_URLS, QCRI_LANGUAGE_TO_CODE
|
||||
from .exceptions import (ServerException, TranslationNotFound)
|
||||
|
||||
class QCRI(object):
|
||||
"""
|
||||
class that wraps functions, which use the QRCI translator under the hood to translate word(s)
|
||||
"""
|
||||
|
||||
def __init__(self, api_key=None):
|
||||
def __init__(self, api_key=None, source="en", target="en", **kwargs):
|
||||
"""
|
||||
@param api_key: your qrci api key. Get one for free here https://mt.qcri.org/api/v1/ref
|
||||
"""
|
||||
|
@ -19,7 +16,8 @@ class QCRI(object):
|
|||
if not api_key:
|
||||
raise ServerException(401)
|
||||
self.__base_url = BASE_URLS.get("QCRI")
|
||||
|
||||
self.source = source
|
||||
self.target = target
|
||||
self.api_key = api_key
|
||||
self.api_endpoints = {
|
||||
"get_languages": "getLanguagePairs",
|
||||
|
@ -40,10 +38,13 @@ class QCRI(object):
|
|||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def get_supported_languages(self):
|
||||
|
||||
pairs = self._get("get_languages")
|
||||
return pairs
|
||||
@staticmethod
|
||||
def get_supported_languages(as_dict=False, **kwargs):
|
||||
# Have no use for this as the format is not what we need
|
||||
# Save this for whenever
|
||||
# pairs = self._get("get_languages")
|
||||
# Using a this one instead
|
||||
return [*QCRI_LANGUAGE_TO_CODE.keys()] if not as_dict else QCRI_LANGUAGE_TO_CODE
|
||||
|
||||
@property
|
||||
def languages(self):
|
||||
|
@ -57,10 +58,10 @@ class QCRI(object):
|
|||
def domains(self):
|
||||
return self.get_domains()
|
||||
|
||||
def translate(self, source, target, domain, text):
|
||||
def translate(self, text, domain, **kwargs):
|
||||
params = {
|
||||
"key": self.api_key,
|
||||
"langpair": "{}-{}".format(source, target),
|
||||
"langpair": "{}-{}".format(self.source, self.target),
|
||||
"domain": domain,
|
||||
"text": text
|
||||
}
|
||||
|
@ -74,18 +75,17 @@ class QCRI(object):
|
|||
ServerException(response.status_code)
|
||||
else:
|
||||
res = response.json()
|
||||
translation = res["translatedText"]
|
||||
translation = res.get("translatedText")
|
||||
if not translation:
|
||||
raise TranslationNotFound(text)
|
||||
return translation
|
||||
|
||||
def translate_batch(self, source, target, domain, batch):
|
||||
def translate_batch(self, batch, domain, **kwargs):
|
||||
"""
|
||||
translate a batch of texts
|
||||
@param source: source language
|
||||
@param target: target language
|
||||
@domain: domain
|
||||
@param batch: list of texts to translate
|
||||
@return: list of translations
|
||||
"""
|
||||
return [self.translate(source, target, domain, text) for text in batch]
|
||||
return [self.translate(domain, text, **kwargs) for text in batch]
|
||||
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
"""Unit test package for deep_translator."""
|
|
@ -1,57 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
"""Tests for `deep_translator` package."""
|
||||
|
||||
import pytest
|
||||
from deep_translator import exceptions, GoogleTranslator
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def google_translator():
|
||||
"""Sample pytest fixture.
|
||||
|
||||
See more at: http://doc.pytest.org/en/latest/fixture.html
|
||||
"""
|
||||
return GoogleTranslator(target='en')
|
||||
|
||||
|
||||
def test_content(google_translator):
|
||||
"""Sample pytest test function with the pytest fixture as an argument."""
|
||||
# from bs4 import BeautifulSoup
|
||||
# assert 'GitHub' in BeautifulSoup(response.content).title.string
|
||||
assert google_translator.translate(text='좋은') == "good"
|
||||
|
||||
|
||||
def test_inputs():
|
||||
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||
GoogleTranslator(source="", target="")
|
||||
|
||||
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||
GoogleTranslator(source="auto", target="nothing")
|
||||
|
||||
# test abbreviations and languages
|
||||
g1 = GoogleTranslator("en", "fr")
|
||||
g2 = GoogleTranslator("english", "french")
|
||||
assert g1._source == g2._source
|
||||
assert g1._target == g2._target
|
||||
|
||||
|
||||
def test_payload(google_translator):
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
google_translator.translate(text="")
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
google_translator.translate(text=123)
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
google_translator.translate(text={})
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
google_translator.translate(text=[])
|
||||
|
||||
with pytest.raises(exceptions.NotValidLength):
|
||||
google_translator.translate("a"*5001)
|
||||
|
||||
#for _ in range(1):
|
||||
#assert google_translator.translate(text='좋은') == "good"
|
|
@ -1,49 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
"""Tests for `deep_translator` package."""
|
||||
|
||||
import pytest
|
||||
from deep_translator import exceptions, LingueeTranslator
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def linguee():
|
||||
return LingueeTranslator(source="english", target='french')
|
||||
|
||||
|
||||
def test_content(linguee):
|
||||
"""Sample pytest test function with the pytest fixture as an argument."""
|
||||
# from bs4 import BeautifulSoup
|
||||
# assert 'GitHub' in BeautifulSoup(response.content).title.string
|
||||
assert linguee.translate(word='good') is not None
|
||||
|
||||
|
||||
def test_inputs():
|
||||
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||
LingueeTranslator(source="", target="")
|
||||
|
||||
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||
LingueeTranslator(source="auto", target="nothing")
|
||||
|
||||
l1 = LingueeTranslator("en", "fr")
|
||||
l2 = LingueeTranslator("english", "french")
|
||||
assert l1._source == l2._source
|
||||
assert l1._target == l2._target
|
||||
|
||||
|
||||
def test_payload(linguee):
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
linguee.translate("")
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
linguee.translate(123)
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
linguee.translate({})
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
linguee.translate([])
|
||||
|
||||
with pytest.raises(exceptions.NotValidLength):
|
||||
linguee.translate("a"*51)
|
|
@ -1,48 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
"""Tests for `deep_translator` package."""
|
||||
|
||||
import pytest
|
||||
from deep_translator import exceptions, MyMemoryTranslator
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mymemory():
|
||||
return MyMemoryTranslator(source="en", target='fr')
|
||||
|
||||
|
||||
def test_content(mymemory):
|
||||
"""Sample pytest test function with the pytest fixture as an argument."""
|
||||
# from bs4 import BeautifulSoup
|
||||
# assert 'GitHub' in BeautifulSoup(response.content).title.string
|
||||
assert mymemory.translate(text='good') is not None
|
||||
|
||||
|
||||
def test_inputs():
|
||||
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||
MyMemoryTranslator(source="", target="")
|
||||
|
||||
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||
MyMemoryTranslator(source="auto", target="nothing")
|
||||
m1 = MyMemoryTranslator("en", "fr")
|
||||
m2 = MyMemoryTranslator("english", "french")
|
||||
assert m1._source == m2._source
|
||||
assert m1._target == m2._target
|
||||
|
||||
|
||||
def test_payload(mymemory):
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
mymemory.translate(text="")
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
mymemory.translate(text=123)
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
mymemory.translate(text={})
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
mymemory.translate(text=[])
|
||||
|
||||
with pytest.raises(exceptions.NotValidLength):
|
||||
mymemory.translate(text="a"*501)
|
|
@ -1,48 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
"""Tests for `deep_translator` package."""
|
||||
|
||||
import pytest
|
||||
from deep_translator import exceptions, PonsTranslator
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pons():
|
||||
return PonsTranslator(source="english", target='french')
|
||||
|
||||
|
||||
def test_content(pons):
|
||||
"""Sample pytest test function with the pytest fixture as an argument."""
|
||||
# from bs4 import BeautifulSoup
|
||||
# assert 'GitHub' in BeautifulSoup(response.content).title.string
|
||||
assert pons.translate(word='good') is not None
|
||||
|
||||
|
||||
def test_inputs():
|
||||
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||
PonsTranslator(source="", target="")
|
||||
|
||||
with pytest.raises(exceptions.LanguageNotSupportedException):
|
||||
PonsTranslator(source="auto", target="nothing")
|
||||
l1 = PonsTranslator("en", "fr")
|
||||
l2 = PonsTranslator("english", "french")
|
||||
assert l1._source == l2._source
|
||||
assert l1._target == l2._target
|
||||
|
||||
|
||||
def test_payload(pons):
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
pons.translate("")
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
pons.translate(123)
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
pons.translate({})
|
||||
|
||||
with pytest.raises(exceptions.NotValidPayload):
|
||||
pons.translate([])
|
||||
|
||||
with pytest.raises(exceptions.NotValidLength):
|
||||
pons.translate("a" * 51)
|
|
@ -1,3 +0,0 @@
|
|||
"""
|
||||
utilities
|
||||
"""
|
|
@ -2,10 +2,8 @@
|
|||
Yandex translator API
|
||||
"""
|
||||
import requests
|
||||
from requests import exceptions
|
||||
from deep_translator.constants import BASE_URLS
|
||||
from deep_translator.exceptions import (RequestError,
|
||||
ServerException, TranslationNotFound, TooManyRequests)
|
||||
from .constants import BASE_URLS
|
||||
from .exceptions import (RequestError, ServerException, TranslationNotFound, TooManyRequests)
|
||||
|
||||
|
||||
class YandexTranslator(object):
|
||||
|
@ -13,13 +11,15 @@ class YandexTranslator(object):
|
|||
class that wraps functions, which use the yandex translator under the hood to translate word(s)
|
||||
"""
|
||||
|
||||
def __init__(self, api_key=None):
|
||||
def __init__(self, api_key=None, source="en", target="de", **kwargs):
|
||||
"""
|
||||
@param api_key: your yandex api key
|
||||
"""
|
||||
if not api_key:
|
||||
raise ServerException(401)
|
||||
self.__base_url = BASE_URLS.get("YANDEX")
|
||||
self.source = source
|
||||
self.target = target
|
||||
|
||||
self.api_key = api_key
|
||||
self.api_version = "v1.5"
|
||||
|
@ -29,7 +29,14 @@ class YandexTranslator(object):
|
|||
"translate": "translate",
|
||||
}
|
||||
|
||||
def get_supported_languages(self):
|
||||
@staticmethod
|
||||
def get_supported_languages(as_dict=False, **kwargs):
|
||||
""" this method is just for consistency."""
|
||||
return """ this method is just for consistency. You need to create an instance of yandex and access
|
||||
supported languages using the languages property or call _get_supported_languages
|
||||
"""
|
||||
|
||||
def _get_supported_languages(self):
|
||||
return set(x.split("-")[0] for x in self.dirs)
|
||||
|
||||
@property
|
||||
|
@ -79,11 +86,11 @@ class YandexTranslator(object):
|
|||
raise ServerException(501)
|
||||
return language
|
||||
|
||||
def translate(self, source, target, text, proxies=None):
|
||||
def translate(self, text, proxies=None, **kwargs):
|
||||
params = {
|
||||
"text": text,
|
||||
"format": "plain",
|
||||
"lang": target if source == "auto" else "{}-{}".format(source, target),
|
||||
"lang": self.target if self.source == "auto" else "{}-{}".format(self.source, self.target),
|
||||
"key": self.api_key
|
||||
}
|
||||
try:
|
||||
|
@ -105,11 +112,9 @@ class YandexTranslator(object):
|
|||
|
||||
return response['text']
|
||||
|
||||
def translate_file(self, source, target, path):
|
||||
def translate_file(self, path, **kwargs):
|
||||
"""
|
||||
translate from a file
|
||||
@param source: source language
|
||||
@param target: target language
|
||||
@param path: path to file
|
||||
@return: translated text
|
||||
"""
|
||||
|
@ -117,16 +122,14 @@ class YandexTranslator(object):
|
|||
with open(path) as f:
|
||||
text = f.read()
|
||||
|
||||
return self.translate(source, target, text)
|
||||
return self.translate(text)
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def translate_batch(self, source, target, batch):
|
||||
def translate_batch(self, batch, **kwargs):
|
||||
"""
|
||||
translate a batch of texts
|
||||
@param source: source language
|
||||
@param target: target language
|
||||
@param batch: list of texts to translate
|
||||
@return: list of translations
|
||||
"""
|
||||
return [self.translate(source, target, text) for text in batch]
|
||||
return [self.translate(text, **kwargs) for text in batch]
|
||||
|
|
|
@ -8,6 +8,7 @@ bottle-fdsend=0.1.1
|
|||
bottle=0.12.13
|
||||
chardet=3.0.4
|
||||
cloudscraper=1.2.58
|
||||
deep-translator=1.5.4
|
||||
dogpile.cache=0.6.5
|
||||
engineio=4.0.2dev
|
||||
enzyme=0.4.1
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue