mirror of
https://github.com/morpheus65535/bazarr.git
synced 2025-04-23 22:27:17 -04:00
WIP
This commit is contained in:
parent
e7cb2a71e2
commit
a7b40eaf79
17 changed files with 75 additions and 99 deletions
|
@ -202,7 +202,7 @@ def download_subtitle(path, language, hi, forced, providers, providers_auth, sce
|
|||
directory=fld,
|
||||
chmod=chmod,
|
||||
# formats=("srt", "vtt")
|
||||
path_decoder=force_unicode
|
||||
path_decoder=None
|
||||
)
|
||||
except Exception as e:
|
||||
logging.exception('BAZARR Error saving subtitles file to disk for this file:' + path)
|
||||
|
@ -419,7 +419,6 @@ def manual_download_subtitle(path, language, hi, forced, subtitle, provider, pro
|
|||
if not subtitle.is_valid():
|
||||
logging.exception('BAZARR No valid subtitles file found for this file: ' + path)
|
||||
return
|
||||
logging.debug('BAZARR Subtitles file downloaded for this file:' + path)
|
||||
try:
|
||||
score = round(subtitle.score / max_score * 100, 2)
|
||||
fld = get_target_folder(path)
|
||||
|
|
|
@ -17,7 +17,7 @@ class Color(_Color):
|
|||
return _Color.__new__(cls, r, g, b, a)
|
||||
|
||||
#: Version of the pysubs2 library.
|
||||
VERSION = "0.2.3"
|
||||
VERSION = "0.2.4"
|
||||
|
||||
|
||||
PY3 = sys.version_info.major == 3
|
||||
|
|
|
@ -4,6 +4,7 @@ from .subrip import SubripFormat
|
|||
from .jsonformat import JSONFormat
|
||||
from .substation import SubstationFormat
|
||||
from .mpl2 import MPL2Format
|
||||
from .tmp import TmpFormat
|
||||
from .exceptions import *
|
||||
|
||||
#: Dict mapping file extensions to format identifiers.
|
||||
|
@ -13,6 +14,7 @@ FILE_EXTENSION_TO_FORMAT_IDENTIFIER = {
|
|||
".ssa": "ssa",
|
||||
".sub": "microdvd",
|
||||
".json": "json",
|
||||
".txt": "tmp",
|
||||
}
|
||||
|
||||
#: Dict mapping format identifiers to implementations (FormatBase subclasses).
|
||||
|
@ -23,6 +25,7 @@ FORMAT_IDENTIFIER_TO_FORMAT_CLASS = {
|
|||
"microdvd": MicroDVDFormat,
|
||||
"json": JSONFormat,
|
||||
"mpl2": MPL2Format,
|
||||
"tmp": TmpFormat,
|
||||
}
|
||||
|
||||
def get_format_class(format_):
|
||||
|
|
|
@ -66,7 +66,14 @@ class SSAFile(MutableSequence):
|
|||
be detected from the file, in which case you don't need
|
||||
to specify it here (when given, this argument overrides
|
||||
autodetection).
|
||||
kwargs: Extra options for the parser.
|
||||
keep_unknown_html_tags (bool): This affects SubRip only (SRT),
|
||||
for other formats this argument is ignored.
|
||||
By default, HTML tags are converted to equivalent SubStation tags
|
||||
(eg. ``<i>`` to ``{\\i1}`` and any remaining tags are removed
|
||||
to keep the text clean. Set this parameter to ``True``
|
||||
if you want to pass through these tags (eg. ``<sub>``).
|
||||
This is useful if your output format is SRT and your player
|
||||
supports these tags.
|
||||
|
||||
Returns:
|
||||
SSAFile
|
||||
|
@ -86,6 +93,7 @@ class SSAFile(MutableSequence):
|
|||
Example:
|
||||
>>> subs1 = pysubs2.load("subrip-subtitles.srt")
|
||||
>>> subs2 = pysubs2.load("microdvd-subtitles.sub", fps=23.976)
|
||||
>>> subs3 = pysubs2.load("subrip-subtitles-with-fancy-tags.srt", keep_unknown_html_tags=True)
|
||||
|
||||
"""
|
||||
with open(path, encoding=encoding) as fp:
|
||||
|
|
|
@ -56,7 +56,7 @@ class SSAStyle(object):
|
|||
self.encoding = 1 #: Charset
|
||||
|
||||
for k, v in fields.items():
|
||||
if k in self.FIELDS and v is not None:
|
||||
if k in self.FIELDS:
|
||||
setattr(self, k, v)
|
||||
else:
|
||||
raise ValueError("SSAStyle has no field named %r" % k)
|
||||
|
|
|
@ -31,7 +31,7 @@ class SubripFormat(FormatBase):
|
|||
return "srt"
|
||||
|
||||
@classmethod
|
||||
def from_file(cls, subs, fp, format_, **kwargs):
|
||||
def from_file(cls, subs, fp, format_, keep_unknown_html_tags=False, **kwargs):
|
||||
timestamps = [] # (start, end)
|
||||
following_lines = [] # contains lists of lines following each timestamp
|
||||
|
||||
|
@ -56,15 +56,15 @@ class SubripFormat(FormatBase):
|
|||
# Handle the general case.
|
||||
s = "".join(lines).strip()
|
||||
s = re.sub(r"\n+ *\d+ *$", "", s) # strip number of next subtitle
|
||||
s = re.sub(r"< *i *>", r"{\i1}", s)
|
||||
s = re.sub(r"< */ *i *>", r"{\i0}", s)
|
||||
s = re.sub(r"< *s *>", r"{\s1}", s)
|
||||
s = re.sub(r"< */ *s *>", r"{\s0}", s)
|
||||
s = re.sub(r"< *u *>", "{\\u1}", s) # not r" for Python 2.7 compat, triggers unicodeescape
|
||||
s = re.sub(r"< */ *u *>", "{\\u0}", s)
|
||||
s = re.sub(r"< */? *[a-zA-Z][^>]*>", "", s) # strip other HTML tags
|
||||
s = re.sub(r"\r", "", s) # convert newlines
|
||||
s = re.sub(r"\n", r"\N", s) # convert newlines
|
||||
s = re.sub(r"< *i *>", r"{\\i1}", s)
|
||||
s = re.sub(r"< */ *i *>", r"{\\i0}", s)
|
||||
s = re.sub(r"< *s *>", r"{\\s1}", s)
|
||||
s = re.sub(r"< */ *s *>", r"{\\s0}", s)
|
||||
s = re.sub(r"< *u *>", "{\\\\u1}", s) # not r" for Python 2.7 compat, triggers unicodeescape
|
||||
s = re.sub(r"< */ *u *>", "{\\\\u0}", s)
|
||||
if not keep_unknown_html_tags:
|
||||
s = re.sub(r"< */? *[a-zA-Z][^>]*>", "", s) # strip other HTML tags
|
||||
s = re.sub(r"\n", r"\\N", s) # convert newlines
|
||||
return s
|
||||
|
||||
subs.events = [SSAEvent(start=start, end=end, text=prepare_text(lines))
|
||||
|
|
|
@ -145,7 +145,12 @@ class SubstationFormat(FormatBase):
|
|||
|
||||
def string_to_field(f, v):
|
||||
if f in {"start", "end"}:
|
||||
return timestamp_to_ms(TIMESTAMP.match(v).groups())
|
||||
if v.startswith("-"):
|
||||
# handle negative timestamps
|
||||
v = v[1:]
|
||||
return -timestamp_to_ms(TIMESTAMP.match(v).groups())
|
||||
else:
|
||||
return timestamp_to_ms(TIMESTAMP.match(v).groups())
|
||||
elif "color" in f:
|
||||
if format_ == "ass":
|
||||
return ass_rgba_to_color(v)
|
||||
|
@ -184,22 +189,22 @@ class SubstationFormat(FormatBase):
|
|||
elif inside_info_section or inside_aegisub_section:
|
||||
if line.startswith(";"): continue # skip comments
|
||||
try:
|
||||
k, v = line.split(": ", 1)
|
||||
k, v = line.split(":", 1)
|
||||
if inside_info_section:
|
||||
subs.info[k] = v
|
||||
subs.info[k] = v.strip()
|
||||
elif inside_aegisub_section:
|
||||
subs.aegisub_project[k] = v
|
||||
subs.aegisub_project[k] = v.strip()
|
||||
except ValueError:
|
||||
pass
|
||||
elif line.startswith("Style:"):
|
||||
_, rest = line.split(": ", 1)
|
||||
_, rest = line.split(":", 1)
|
||||
buf = rest.strip().split(",")
|
||||
name, raw_fields = buf[0], buf[1:] # splat workaround for Python 2.7
|
||||
field_dict = {f: string_to_field(f, v) for f, v in zip(STYLE_FIELDS[format_], raw_fields)}
|
||||
sty = SSAStyle(**field_dict)
|
||||
subs.styles[name] = sty
|
||||
elif line.startswith("Dialogue:") or line.startswith("Comment:"):
|
||||
ev_type, rest = line.split(": ", 1)
|
||||
ev_type, rest = line.split(":", 1)
|
||||
raw_fields = rest.strip().split(",", len(EVENT_FIELDS[format_])-1)
|
||||
field_dict = {f: string_to_field(f, v) for f, v in zip(EVENT_FIELDS[format_], raw_fields)}
|
||||
field_dict["type"] = ev_type
|
||||
|
|
|
@ -49,6 +49,20 @@ def timestamp_to_ms(groups):
|
|||
ms += h * 3600000
|
||||
return ms
|
||||
|
||||
def tmptimestamp_to_ms(groups):
|
||||
"""
|
||||
Convert groups from :data:`pysubs2.time.TMPTIMESTAMP` match to milliseconds.
|
||||
|
||||
Example:
|
||||
>>> timestamp_to_ms(TIMESTAMP.match("0:00:01").groups())
|
||||
1000
|
||||
|
||||
"""
|
||||
h, m, s = map(int, groups)
|
||||
ms = s * 1000
|
||||
ms += m * 60000
|
||||
ms += h * 3600000
|
||||
return ms
|
||||
def times_to_ms(h=0, m=0, s=0, ms=0):
|
||||
"""
|
||||
Convert hours, minutes, seconds to milliseconds.
|
||||
|
|
|
@ -1,45 +0,0 @@
|
|||
# coding=utf-8
|
||||
|
||||
from __future__ import print_function, division, unicode_literals
|
||||
import re
|
||||
from numbers import Number
|
||||
|
||||
from pysubs2.time import times_to_ms
|
||||
from .formatbase import FormatBase
|
||||
from .ssaevent import SSAEvent
|
||||
from .ssastyle import SSAStyle
|
||||
|
||||
|
||||
# thanks to http://otsaloma.io/gaupol/doc/api/aeidon.files.mpl2_source.html
|
||||
MPL2_FORMAT = re.compile(r"^(?um)\[(-?\d+)\]\[(-?\d+)\](.*?)$")
|
||||
|
||||
|
||||
class TXTGenericFormat(FormatBase):
|
||||
@classmethod
|
||||
def guess_format(cls, text):
|
||||
if MPL2_FORMAT.match(text):
|
||||
return "mpl2"
|
||||
|
||||
|
||||
class MPL2Format(FormatBase):
|
||||
@classmethod
|
||||
def guess_format(cls, text):
|
||||
return TXTGenericFormat.guess_format(text)
|
||||
|
||||
@classmethod
|
||||
def from_file(cls, subs, fp, format_, **kwargs):
|
||||
def prepare_text(lines):
|
||||
out = []
|
||||
for s in lines.split("|"):
|
||||
if s.startswith("/"):
|
||||
out.append(r"{\i1}%s{\i0}" % s[1:])
|
||||
continue
|
||||
out.append(s)
|
||||
return "\n".join(out)
|
||||
|
||||
subs.events = [SSAEvent(start=times_to_ms(s=float(start) / 10), end=times_to_ms(s=float(end) / 10),
|
||||
text=prepare_text(text)) for start, end, text in MPL2_FORMAT.findall(fp.getvalue())]
|
||||
|
||||
@classmethod
|
||||
def to_file(cls, subs, fp, format_, **kwargs):
|
||||
raise NotImplemented
|
|
@ -854,8 +854,8 @@ def save_subtitles(file_path, subtitles, single=False, directory=None, chmod=Non
|
|||
logger.debug(u"Saving %r to %r", subtitle, subtitle_path)
|
||||
content = subtitle.get_modified_content(format=format, debug=debug_mods)
|
||||
if content:
|
||||
with open(subtitle_path, 'w') as f:
|
||||
f.write(content.decode('utf-8'))
|
||||
with open(subtitle_path, 'wb') as f:
|
||||
f.write(content)
|
||||
subtitle.storage_path = subtitle_path
|
||||
else:
|
||||
logger.error(u"Something went wrong when getting modified subtitle for %s", subtitle)
|
||||
|
|
|
@ -148,7 +148,7 @@ class CFSession(CloudScraper):
|
|||
cache_key = "cf_data3_%s" % domain
|
||||
|
||||
if not self.cookies.get("cf_clearance", "", domain=domain):
|
||||
cf_data = region.get(cache_key)
|
||||
cf_data = str(region.get(cache_key))
|
||||
if cf_data is not NO_VALUE:
|
||||
cf_cookies, hdrs = cf_data
|
||||
logger.debug("Trying to use old cf data for %s: %s", domain, cf_data)
|
||||
|
@ -165,9 +165,9 @@ class CFSession(CloudScraper):
|
|||
pass
|
||||
else:
|
||||
if cf_data and "cf_clearance" in cf_data[0] and cf_data[0]["cf_clearance"]:
|
||||
if cf_data != region.get(cache_key):
|
||||
if cf_data != str(region.get(cache_key)):
|
||||
logger.debug("Storing cf data for %s: %s", domain, cf_data)
|
||||
region.set(cache_key, cf_data)
|
||||
region.set(cache_key, bytearray(cf_data, encoding='utf-8'))
|
||||
elif cf_data[0]["cf_clearance"]:
|
||||
logger.debug("CF Live tokens not updated")
|
||||
|
||||
|
|
|
@ -257,4 +257,4 @@ def load_verification(site_name, session, callback=lambda x: None):
|
|||
|
||||
|
||||
def store_verification(site_name, session):
|
||||
region.set("%s_data" % site_name, session.cookies._cookies, session.headers["User-Agent"])
|
||||
region.set("%s_data" % site_name, (session.cookies._cookies, session.headers["User-Agent"]))
|
||||
|
|
|
@ -104,11 +104,11 @@ class Addic7edProvider(_Addic7edProvider):
|
|||
tries = 0
|
||||
while tries < 3:
|
||||
r = self.session.get(self.server_url + 'login.php', timeout=10, headers={"Referer": self.server_url})
|
||||
if "grecaptcha" in r.content:
|
||||
if "grecaptcha" in r.text:
|
||||
logger.info('Addic7ed: Solving captcha. This might take a couple of minutes, but should only '
|
||||
'happen once every so often')
|
||||
|
||||
site_key = re.search(r'grecaptcha.execute\(\'(.+?)\',', r.content).group(1)
|
||||
site_key = re.search(r'grecaptcha.execute\(\'(.+?)\',', r.text).group(1)
|
||||
if not site_key:
|
||||
logger.error("Addic7ed: Captcha site-key not found!")
|
||||
return
|
||||
|
@ -127,11 +127,11 @@ class Addic7edProvider(_Addic7edProvider):
|
|||
r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10,
|
||||
headers={"Referer": self.server_url + "login.php"})
|
||||
|
||||
if "relax, slow down" in r.content:
|
||||
if "relax, slow down" in r.text:
|
||||
raise TooManyRequests(self.username)
|
||||
|
||||
if r.status_code != 302:
|
||||
if "User <b></b> doesn't exist" in r.content and tries <= 2:
|
||||
if "User <b></b> doesn't exist" in r.text and tries <= 2:
|
||||
logger.info("Addic7ed: Error, trying again. (%s/%s)", tries+1, 3)
|
||||
tries += 1
|
||||
continue
|
||||
|
@ -208,8 +208,8 @@ class Addic7edProvider(_Addic7edProvider):
|
|||
if show_cells:
|
||||
soup = ParserBeautifulSoup(b''.join(show_cells), ['lxml', 'html.parser'])
|
||||
else:
|
||||
# If RegEx fails, fall back to original r.content and use 'html.parser'
|
||||
soup = ParserBeautifulSoup(r.content, ['html.parser'])
|
||||
# If RegEx fails, fall back to original r.text and use 'html.parser'
|
||||
soup = ParserBeautifulSoup(r.text, ['html.parser'])
|
||||
|
||||
# populate the show ids
|
||||
show_ids = {}
|
||||
|
@ -265,7 +265,7 @@ class Addic7edProvider(_Addic7edProvider):
|
|||
r = self.session.get(self.server_url + endpoint, params=params, timeout=10, headers=headers)
|
||||
r.raise_for_status()
|
||||
|
||||
if r.content and "Sorry, your search" not in r.content:
|
||||
if r.text and "Sorry, your search" not in r.text:
|
||||
break
|
||||
|
||||
time.sleep(4)
|
||||
|
@ -273,7 +273,7 @@ class Addic7edProvider(_Addic7edProvider):
|
|||
if r.status_code == 304:
|
||||
raise TooManyRequests()
|
||||
|
||||
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
|
||||
soup = ParserBeautifulSoup(r.text, ['lxml', 'html.parser'])
|
||||
|
||||
suggestion = None
|
||||
|
||||
|
@ -315,13 +315,13 @@ class Addic7edProvider(_Addic7edProvider):
|
|||
if r.status_code == 304:
|
||||
raise TooManyRequests()
|
||||
|
||||
if not r.content:
|
||||
if not r.text:
|
||||
# Provider wrongful return a status of 304 Not Modified with an empty content
|
||||
# raise_for_status won't raise exception for that status code
|
||||
logger.error('No data returned from provider')
|
||||
return []
|
||||
|
||||
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
|
||||
soup = ParserBeautifulSoup(r.text, ['lxml', 'html.parser'])
|
||||
|
||||
# loop over subtitle rows
|
||||
subtitles = []
|
||||
|
@ -364,7 +364,7 @@ class Addic7edProvider(_Addic7edProvider):
|
|||
if r.status_code == 304:
|
||||
raise TooManyRequests()
|
||||
|
||||
if not r.content:
|
||||
if not r.text:
|
||||
# Provider wrongful return a status of 304 Not Modified with an empty content
|
||||
# raise_for_status won't raise exception for that status code
|
||||
logger.error('Unable to download subtitle. No data returned from provider')
|
||||
|
|
|
@ -116,7 +116,7 @@ class HosszupuskaSubtitle(Subtitle):
|
|||
if video.format and self.version and video.format.lower() in self.version.lower():
|
||||
matches.add('format')
|
||||
# other properties
|
||||
matches |= guess_matches(video, guessit(self.release_info.encode("utf-8")))
|
||||
matches |= guess_matches(video, guessit(self.release_info))
|
||||
|
||||
return matches
|
||||
|
||||
|
|
|
@ -199,7 +199,7 @@ class LegendasTVProvider(_LegendasTVProvider):
|
|||
|
||||
# attempt to get the releases from the cache
|
||||
cache_key = releases_key.format(archive_id=a.id, archive_name=a.name)
|
||||
releases = region.get(cache_key, expiration_time=expiration_time)
|
||||
releases = str(region.get(cache_key, expiration_time=expiration_time))
|
||||
|
||||
# the releases are not in cache or cache is expired
|
||||
if releases == NO_VALUE:
|
||||
|
@ -226,7 +226,7 @@ class LegendasTVProvider(_LegendasTVProvider):
|
|||
releases.append(name)
|
||||
|
||||
# cache the releases
|
||||
region.set(cache_key, releases)
|
||||
region.set(cache_key, bytearray(releases, encoding='utf-8'))
|
||||
|
||||
# iterate over releases
|
||||
for r in releases:
|
||||
|
|
|
@ -158,13 +158,5 @@ class ProviderSubtitleArchiveMixin(object):
|
|||
elif subs_fallback:
|
||||
matching_sub = subs_fallback[0]
|
||||
|
||||
try:
|
||||
matching_sub_unicode = matching_sub.decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
matching_sub_unicode = matching_sub.decode("cp437")
|
||||
except UnicodeDecodeError:
|
||||
matching_sub_unicode = matching_sub.decode("utf-8", errors='replace')
|
||||
|
||||
logger.info(u"Using %s from the archive", matching_sub_unicode)
|
||||
logger.info(u"Using %s from the archive", matching_sub)
|
||||
return fix_line_ending(archive.read(matching_sub))
|
||||
|
|
|
@ -141,7 +141,7 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
|
|||
logger.info("Creating session")
|
||||
self.session = RetryingCFSession()
|
||||
|
||||
prev_cookies = region.get("subscene_cookies2")
|
||||
prev_cookies = str(region.get("subscene_cookies2"))
|
||||
if prev_cookies != NO_VALUE:
|
||||
logger.debug("Re-using old subscene cookies: %r", prev_cookies)
|
||||
self.session.cookies.update(prev_cookies)
|
||||
|
@ -194,7 +194,7 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
|
|||
del cj[cn]
|
||||
|
||||
logger.debug("Storing cookies: %r", cj)
|
||||
region.set("subscene_cookies2", cj)
|
||||
region.set("subscene_cookies2", bytearray(cj, encoding='utf-8'))
|
||||
return
|
||||
raise ProviderError("Something went wrong when trying to log in #1")
|
||||
|
||||
|
@ -219,9 +219,9 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
|
|||
acc_filters["SelectedIds"] = selected_ids
|
||||
self.filters["LanguageFilter"] = ",".join(acc_filters["SelectedIds"])
|
||||
|
||||
last_filters = region.get("subscene_filters")
|
||||
last_filters = str(region.get("subscene_filters"))
|
||||
if last_filters != acc_filters:
|
||||
region.set("subscene_filters", acc_filters)
|
||||
region.set("subscene_filters", bytearray(acc_filters, encoding='utf-8'))
|
||||
logger.debug("Setting account filters to %r", acc_filters)
|
||||
self.session.post("https://u.subscene.com/filter", acc_filters, allow_redirects=False)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue