mirror of
https://github.com/morpheus65535/bazarr.git
synced 2025-04-24 06:37:16 -04:00
Upgraded GuessIt to 3.0.1
This commit is contained in:
parent
5b44007bbb
commit
376e13d7f1
110 changed files with 10933 additions and 3549 deletions
|
@ -918,8 +918,8 @@ def refine_from_db(path, video):
|
|||
if int(data['year']) > 0: video.year = int(data['year'])
|
||||
video.series_tvdb_id = int(data['tvdbId'])
|
||||
video.alternative_series = ast.literal_eval(data['alternateTitles'])
|
||||
if not video.format:
|
||||
video.format = str(data['format'])
|
||||
if not video.source:
|
||||
video.source = str(data['format'])
|
||||
if not video.resolution:
|
||||
video.resolution = str(data['resolution'])
|
||||
if not video.video_codec:
|
||||
|
@ -937,8 +937,8 @@ def refine_from_db(path, video):
|
|||
if int(data['year']) > 0: video.year = int(data['year'])
|
||||
if data['imdbId']: video.imdb_id = data['imdbId']
|
||||
video.alternative_titles = ast.literal_eval(data['alternativeTitles'])
|
||||
if not video.format:
|
||||
if data['format']: video.format = data['format']
|
||||
if not video.source:
|
||||
if data['format']: video.source = data['format']
|
||||
if not video.resolution:
|
||||
if data['resolution']: video.resolution = data['resolution']
|
||||
if not video.video_codec:
|
||||
|
|
|
@ -3,7 +3,12 @@
|
|||
"""
|
||||
Extracts as much information as possible from a video file.
|
||||
"""
|
||||
from . import monkeypatch as _monkeypatch
|
||||
|
||||
from .api import guessit, GuessItApi
|
||||
from .options import ConfigurationException
|
||||
from .rules.common.quantity import Size
|
||||
|
||||
from .__version__ import __version__
|
||||
|
||||
_monkeypatch.monkeypatch_rebulk()
|
||||
|
|
|
@ -17,7 +17,13 @@ from rebulk.__version__ import __version__ as __rebulk_version__
|
|||
from guessit import api
|
||||
from guessit.__version__ import __version__
|
||||
from guessit.jsonutils import GuessitEncoder
|
||||
from guessit.options import argument_parser, parse_options, load_config
|
||||
from guessit.options import argument_parser, parse_options, load_config, merge_options
|
||||
|
||||
|
||||
try:
|
||||
from collections import OrderedDict
|
||||
except ImportError: # pragma: no-cover
|
||||
from ordereddict import OrderedDict # pylint:disable=import-error
|
||||
|
||||
|
||||
def guess_filename(filename, options):
|
||||
|
@ -45,7 +51,7 @@ def guess_filename(filename, options):
|
|||
import yaml
|
||||
from guessit import yamlutils
|
||||
|
||||
ystr = yaml.dump({filename: dict(guess)}, Dumper=yamlutils.CustomDumper, default_flow_style=False,
|
||||
ystr = yaml.dump({filename: OrderedDict(guess)}, Dumper=yamlutils.CustomDumper, default_flow_style=False,
|
||||
allow_unicode=True)
|
||||
i = 0
|
||||
for yline in ystr.splitlines():
|
||||
|
@ -91,9 +97,9 @@ def display_properties(options):
|
|||
print(4 * ' ' + '[!] %s' % (property_value,))
|
||||
|
||||
|
||||
def main(args=None): # pylint:disable=too-many-branches
|
||||
def fix_argv_encoding():
|
||||
"""
|
||||
Main function for entry point
|
||||
Fix encoding of sys.argv on windows Python 2
|
||||
"""
|
||||
if six.PY2 and os.name == 'nt': # pragma: no cover
|
||||
# see http://bugs.python.org/issue2128
|
||||
|
@ -102,11 +108,21 @@ def main(args=None): # pylint:disable=too-many-branches
|
|||
for i, j in enumerate(sys.argv):
|
||||
sys.argv[i] = j.decode(locale.getpreferredencoding())
|
||||
|
||||
|
||||
def main(args=None): # pylint:disable=too-many-branches
|
||||
"""
|
||||
Main function for entry point
|
||||
"""
|
||||
fix_argv_encoding()
|
||||
|
||||
if args is None: # pragma: no cover
|
||||
options = parse_options()
|
||||
else:
|
||||
options = parse_options(args)
|
||||
options = load_config(options)
|
||||
|
||||
config = load_config(options)
|
||||
options = merge_options(config, options)
|
||||
|
||||
if options.get('verbose'):
|
||||
logging.basicConfig(stream=sys.stdout, format='%(message)s')
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
|
@ -126,7 +142,7 @@ def main(args=None): # pylint:disable=too-many-branches
|
|||
|
||||
if options.get('yaml'):
|
||||
try:
|
||||
import yaml # pylint:disable=unused-variable
|
||||
import yaml # pylint:disable=unused-variable,unused-import
|
||||
except ImportError: # pragma: no cover
|
||||
del options['yaml']
|
||||
print('PyYAML is not installed. \'--yaml\' option will be ignored ...', file=sys.stderr)
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
Version module
|
||||
"""
|
||||
# pragma: no cover
|
||||
__version__ = '2.1.4'
|
||||
__version__ = '3.1.1'
|
||||
|
|
|
@ -3,26 +3,28 @@
|
|||
"""
|
||||
API functions that can be used by external software
|
||||
"""
|
||||
|
||||
try:
|
||||
from collections import OrderedDict
|
||||
except ImportError: # pragma: no-cover
|
||||
from ordereddict import OrderedDict # pylint:disable=import-error
|
||||
|
||||
import os
|
||||
import traceback
|
||||
|
||||
import six
|
||||
|
||||
from rebulk.introspector import introspect
|
||||
|
||||
from .rules import rebulk_builder
|
||||
from .options import parse_options
|
||||
from .__version__ import __version__
|
||||
from .options import parse_options, load_config, merge_options
|
||||
from .rules import rebulk_builder
|
||||
|
||||
|
||||
class GuessitException(Exception):
|
||||
"""
|
||||
Exception raised when guessit fails to perform a guess because of an internal error.
|
||||
"""
|
||||
|
||||
def __init__(self, string, options):
|
||||
super(GuessitException, self).__init__("An internal error has occured in guessit.\n"
|
||||
"===================== Guessit Exception Report =====================\n"
|
||||
|
@ -41,12 +43,27 @@ class GuessitException(Exception):
|
|||
self.options = options
|
||||
|
||||
|
||||
def configure(options=None, rules_builder=rebulk_builder, force=False):
|
||||
"""
|
||||
Load configuration files and initialize rebulk rules if required.
|
||||
|
||||
:param options:
|
||||
:type options: dict
|
||||
:param rules_builder:
|
||||
:type rules_builder:
|
||||
:param force:
|
||||
:type force: bool
|
||||
:return:
|
||||
"""
|
||||
default_api.configure(options, rules_builder=rules_builder, force=force)
|
||||
|
||||
|
||||
def guessit(string, options=None):
|
||||
"""
|
||||
Retrieves all matches from string as a dict
|
||||
:param string: the filename or release name
|
||||
:type string: str
|
||||
:param options: the filename or release name
|
||||
:param options:
|
||||
:type options: str|dict
|
||||
:return:
|
||||
:rtype:
|
||||
|
@ -58,65 +75,138 @@ def properties(options=None):
|
|||
"""
|
||||
Retrieves all properties with possible values that can be guessed
|
||||
:param options:
|
||||
:type options:
|
||||
:type options: str|dict
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return default_api.properties(options)
|
||||
|
||||
|
||||
def suggested_expected(titles, options=None):
|
||||
"""
|
||||
Return a list of suggested titles to be used as `expected_title` based on the list of titles
|
||||
:param titles: the filename or release name
|
||||
:type titles: list|set|dict
|
||||
:param options:
|
||||
:type options: str|dict
|
||||
:return:
|
||||
:rtype: list of str
|
||||
"""
|
||||
return default_api.suggested_expected(titles, options)
|
||||
|
||||
|
||||
class GuessItApi(object):
|
||||
"""
|
||||
An api class that can be configured with custom Rebulk configuration.
|
||||
"""
|
||||
|
||||
def __init__(self, rebulk):
|
||||
"""
|
||||
:param rebulk: Rebulk instance to use.
|
||||
:type rebulk: Rebulk
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self.rebulk = rebulk
|
||||
def __init__(self):
|
||||
"""Default constructor."""
|
||||
self.rebulk = None
|
||||
self.config = None
|
||||
self.load_config_options = None
|
||||
self.advanced_config = None
|
||||
|
||||
@staticmethod
|
||||
def _fix_option_encoding(value):
|
||||
@classmethod
|
||||
def _fix_encoding(cls, value):
|
||||
if isinstance(value, list):
|
||||
return [GuessItApi._fix_option_encoding(item) for item in value]
|
||||
return [cls._fix_encoding(item) for item in value]
|
||||
if isinstance(value, dict):
|
||||
return {cls._fix_encoding(k): cls._fix_encoding(v) for k, v in value.items()}
|
||||
if six.PY2 and isinstance(value, six.text_type):
|
||||
return value.encode("utf-8")
|
||||
return value.encode('utf-8')
|
||||
if six.PY3 and isinstance(value, six.binary_type):
|
||||
return value.decode('ascii')
|
||||
return value
|
||||
|
||||
def guessit(self, string, options=None):
|
||||
@classmethod
|
||||
def _has_same_properties(cls, dic1, dic2, values):
|
||||
for value in values:
|
||||
if dic1.get(value) != dic2.get(value):
|
||||
return False
|
||||
return True
|
||||
|
||||
def configure(self, options=None, rules_builder=rebulk_builder, force=False, sanitize_options=True):
|
||||
"""
|
||||
Load configuration files and initialize rebulk rules if required.
|
||||
|
||||
:param options:
|
||||
:type options: str|dict
|
||||
:param rules_builder:
|
||||
:type rules_builder:
|
||||
:param force:
|
||||
:type force: bool
|
||||
:return:
|
||||
:rtype: dict
|
||||
"""
|
||||
if sanitize_options:
|
||||
options = parse_options(options, True)
|
||||
options = self._fix_encoding(options)
|
||||
|
||||
if self.config is None or self.load_config_options is None or force or \
|
||||
not self._has_same_properties(self.load_config_options,
|
||||
options,
|
||||
['config', 'no_user_config', 'no_default_config']):
|
||||
config = load_config(options)
|
||||
config = self._fix_encoding(config)
|
||||
self.load_config_options = options
|
||||
else:
|
||||
config = self.config
|
||||
|
||||
advanced_config = merge_options(config.get('advanced_config'), options.get('advanced_config'))
|
||||
|
||||
should_build_rebulk = force or not self.rebulk or not self.advanced_config or \
|
||||
self.advanced_config != advanced_config
|
||||
|
||||
if should_build_rebulk:
|
||||
self.advanced_config = advanced_config
|
||||
self.rebulk = rules_builder(advanced_config)
|
||||
|
||||
self.config = config
|
||||
return self.config
|
||||
|
||||
def guessit(self, string, options=None): # pylint: disable=too-many-branches
|
||||
"""
|
||||
Retrieves all matches from string as a dict
|
||||
:param string: the filename or release name
|
||||
:type string: str
|
||||
:param options: the filename or release name
|
||||
:type string: str|Path
|
||||
:param options:
|
||||
:type options: str|dict
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
try:
|
||||
from pathlib import Path
|
||||
if isinstance(string, Path):
|
||||
try:
|
||||
# Handle path-like object
|
||||
string = os.fspath(string)
|
||||
except AttributeError:
|
||||
string = str(string)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
options = parse_options(options, True)
|
||||
options = self._fix_encoding(options)
|
||||
config = self.configure(options, sanitize_options=False)
|
||||
options = merge_options(config, options)
|
||||
result_decode = False
|
||||
result_encode = False
|
||||
|
||||
fixed_options = {}
|
||||
for (key, value) in options.items():
|
||||
key = GuessItApi._fix_option_encoding(key)
|
||||
value = GuessItApi._fix_option_encoding(value)
|
||||
fixed_options[key] = value
|
||||
options = fixed_options
|
||||
if six.PY2:
|
||||
if isinstance(string, six.text_type):
|
||||
string = string.encode("utf-8")
|
||||
result_decode = True
|
||||
elif isinstance(string, six.binary_type):
|
||||
string = six.binary_type(string)
|
||||
if six.PY3:
|
||||
if isinstance(string, six.binary_type):
|
||||
string = string.decode('ascii')
|
||||
result_encode = True
|
||||
elif isinstance(string, six.text_type):
|
||||
string = six.text_type(string)
|
||||
|
||||
if six.PY2 and isinstance(string, six.text_type):
|
||||
string = string.encode("utf-8")
|
||||
result_decode = True
|
||||
if six.PY3 and isinstance(string, six.binary_type):
|
||||
string = string.decode('ascii')
|
||||
result_encode = True
|
||||
matches = self.rebulk.matches(string, options)
|
||||
if result_decode:
|
||||
for match in matches:
|
||||
|
@ -139,6 +229,10 @@ class GuessItApi(object):
|
|||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
options = parse_options(options, True)
|
||||
options = self._fix_encoding(options)
|
||||
config = self.configure(options, sanitize_options=False)
|
||||
options = merge_options(config, options)
|
||||
unordered = introspect(self.rebulk, options).properties
|
||||
ordered = OrderedDict()
|
||||
for k in sorted(unordered.keys(), key=six.text_type):
|
||||
|
@ -147,5 +241,23 @@ class GuessItApi(object):
|
|||
ordered = self.rebulk.customize_properties(ordered)
|
||||
return ordered
|
||||
|
||||
def suggested_expected(self, titles, options=None):
|
||||
"""
|
||||
Return a list of suggested titles to be used as `expected_title` based on the list of titles
|
||||
:param titles: the filename or release name
|
||||
:type titles: list|set|dict
|
||||
:param options:
|
||||
:type options: str|dict
|
||||
:return:
|
||||
:rtype: list of str
|
||||
"""
|
||||
suggested = []
|
||||
for title in titles:
|
||||
guess = self.guessit(title, options)
|
||||
if len(guess) != 2 or 'title' not in guess:
|
||||
suggested.append(title)
|
||||
|
||||
default_api = GuessItApi(rebulk_builder())
|
||||
return suggested
|
||||
|
||||
|
||||
default_api = GuessItApi()
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
Backports
|
||||
"""
|
||||
# pragma: no-cover
|
||||
# pylint: disabled
|
||||
# pylint: skip-file
|
||||
|
||||
def cmp_to_key(mycmp):
|
||||
"""functools.cmp_to_key backport"""
|
||||
|
|
|
@ -1,5 +1,586 @@
|
|||
{
|
||||
"expected_title": [
|
||||
"OSS 117"
|
||||
]
|
||||
}
|
||||
"OSS 117",
|
||||
"This is Us"
|
||||
],
|
||||
"allowed_countries": [
|
||||
"au",
|
||||
"gb",
|
||||
"us"
|
||||
],
|
||||
"allowed_languages": [
|
||||
"ca",
|
||||
"cs",
|
||||
"de",
|
||||
"en",
|
||||
"es",
|
||||
"fr",
|
||||
"he",
|
||||
"hi",
|
||||
"hu",
|
||||
"it",
|
||||
"ja",
|
||||
"ko",
|
||||
"mul",
|
||||
"nl",
|
||||
"no",
|
||||
"pl",
|
||||
"pt",
|
||||
"ro",
|
||||
"ru",
|
||||
"sv",
|
||||
"te",
|
||||
"uk",
|
||||
"und"
|
||||
],
|
||||
"advanced_config": {
|
||||
"common_words": [
|
||||
"ca",
|
||||
"cat",
|
||||
"de",
|
||||
"he",
|
||||
"it",
|
||||
"no",
|
||||
"por",
|
||||
"rum",
|
||||
"se",
|
||||
"st",
|
||||
"sub"
|
||||
],
|
||||
"groups": {
|
||||
"starting": "([{",
|
||||
"ending": ")]}"
|
||||
},
|
||||
"audio_codec": {
|
||||
"audio_channels": {
|
||||
"1.0": [
|
||||
"1ch",
|
||||
"mono"
|
||||
],
|
||||
"2.0": [
|
||||
"2ch",
|
||||
"stereo",
|
||||
"re:(2[\\W_]0(?:ch)?)(?=[^\\d]|$)"
|
||||
],
|
||||
"5.1": [
|
||||
"5ch",
|
||||
"6ch",
|
||||
"re:(5[\\W_][01](?:ch)?)(?=[^\\d]|$)",
|
||||
"re:(6[\\W_]0(?:ch)?)(?=[^\\d]|$)"
|
||||
],
|
||||
"7.1": [
|
||||
"7ch",
|
||||
"8ch",
|
||||
"re:(7[\\W_][01](?:ch)?)(?=[^\\d]|$)"
|
||||
]
|
||||
}
|
||||
},
|
||||
"container": {
|
||||
"subtitles": [
|
||||
"srt",
|
||||
"idx",
|
||||
"sub",
|
||||
"ssa",
|
||||
"ass"
|
||||
],
|
||||
"info": [
|
||||
"nfo"
|
||||
],
|
||||
"videos": [
|
||||
"3g2",
|
||||
"3gp",
|
||||
"3gp2",
|
||||
"asf",
|
||||
"avi",
|
||||
"divx",
|
||||
"flv",
|
||||
"iso",
|
||||
"m4v",
|
||||
"mk2",
|
||||
"mk3d",
|
||||
"mka",
|
||||
"mkv",
|
||||
"mov",
|
||||
"mp4",
|
||||
"mp4a",
|
||||
"mpeg",
|
||||
"mpg",
|
||||
"ogg",
|
||||
"ogm",
|
||||
"ogv",
|
||||
"qt",
|
||||
"ra",
|
||||
"ram",
|
||||
"rm",
|
||||
"ts",
|
||||
"vob",
|
||||
"wav",
|
||||
"webm",
|
||||
"wma",
|
||||
"wmv"
|
||||
],
|
||||
"torrent": [
|
||||
"torrent"
|
||||
],
|
||||
"nzb": [
|
||||
"nzb"
|
||||
]
|
||||
},
|
||||
"country": {
|
||||
"synonyms": {
|
||||
"ES": [
|
||||
"españa"
|
||||
],
|
||||
"GB": [
|
||||
"UK"
|
||||
],
|
||||
"BR": [
|
||||
"brazilian",
|
||||
"bra"
|
||||
],
|
||||
"CA": [
|
||||
"québec",
|
||||
"quebec",
|
||||
"qc"
|
||||
],
|
||||
"MX": [
|
||||
"Latinoamérica",
|
||||
"latin america"
|
||||
]
|
||||
}
|
||||
},
|
||||
"episodes": {
|
||||
"season_max_range": 100,
|
||||
"episode_max_range": 100,
|
||||
"max_range_gap": 1,
|
||||
"season_markers": [
|
||||
"s"
|
||||
],
|
||||
"season_ep_markers": [
|
||||
"x"
|
||||
],
|
||||
"disc_markers": [
|
||||
"d"
|
||||
],
|
||||
"episode_markers": [
|
||||
"xe",
|
||||
"ex",
|
||||
"ep",
|
||||
"e",
|
||||
"x"
|
||||
],
|
||||
"range_separators": [
|
||||
"-",
|
||||
"~",
|
||||
"to",
|
||||
"a"
|
||||
],
|
||||
"discrete_separators": [
|
||||
"+",
|
||||
"&",
|
||||
"and",
|
||||
"et"
|
||||
],
|
||||
"season_words": [
|
||||
"season",
|
||||
"saison",
|
||||
"seizoen",
|
||||
"seasons",
|
||||
"saisons",
|
||||
"tem",
|
||||
"temp",
|
||||
"temporada",
|
||||
"temporadas",
|
||||
"stagione"
|
||||
],
|
||||
"episode_words": [
|
||||
"episode",
|
||||
"episodes",
|
||||
"eps",
|
||||
"ep",
|
||||
"episodio",
|
||||
"episodios",
|
||||
"capitulo",
|
||||
"capitulos"
|
||||
],
|
||||
"of_words": [
|
||||
"of",
|
||||
"sur"
|
||||
],
|
||||
"all_words": [
|
||||
"All"
|
||||
]
|
||||
},
|
||||
"language": {
|
||||
"synonyms": {
|
||||
"ell": [
|
||||
"gr",
|
||||
"greek"
|
||||
],
|
||||
"spa": [
|
||||
"esp",
|
||||
"español",
|
||||
"espanol"
|
||||
],
|
||||
"fra": [
|
||||
"français",
|
||||
"vf",
|
||||
"vff",
|
||||
"vfi",
|
||||
"vfq"
|
||||
],
|
||||
"swe": [
|
||||
"se"
|
||||
],
|
||||
"por_BR": [
|
||||
"po",
|
||||
"pb",
|
||||
"pob",
|
||||
"ptbr",
|
||||
"br",
|
||||
"brazilian"
|
||||
],
|
||||
"deu_CH": [
|
||||
"swissgerman",
|
||||
"swiss german"
|
||||
],
|
||||
"nld_BE": [
|
||||
"flemish"
|
||||
],
|
||||
"cat": [
|
||||
"català",
|
||||
"castellano",
|
||||
"espanol castellano",
|
||||
"español castellano"
|
||||
],
|
||||
"ces": [
|
||||
"cz"
|
||||
],
|
||||
"ukr": [
|
||||
"ua"
|
||||
],
|
||||
"zho": [
|
||||
"cn"
|
||||
],
|
||||
"jpn": [
|
||||
"jp"
|
||||
],
|
||||
"hrv": [
|
||||
"scr"
|
||||
],
|
||||
"mul": [
|
||||
"multi",
|
||||
"dl"
|
||||
]
|
||||
},
|
||||
"subtitle_affixes": [
|
||||
"sub",
|
||||
"subs",
|
||||
"esub",
|
||||
"esubs",
|
||||
"subbed",
|
||||
"custom subbed",
|
||||
"custom subs",
|
||||
"custom sub",
|
||||
"customsubbed",
|
||||
"customsubs",
|
||||
"customsub",
|
||||
"soft subtitles",
|
||||
"soft subs"
|
||||
],
|
||||
"subtitle_prefixes": [
|
||||
"st",
|
||||
"vost",
|
||||
"subforced",
|
||||
"fansub",
|
||||
"hardsub",
|
||||
"legenda",
|
||||
"legendas",
|
||||
"legendado",
|
||||
"subtitulado",
|
||||
"soft",
|
||||
"subtitles"
|
||||
],
|
||||
"subtitle_suffixes": [
|
||||
"subforced",
|
||||
"fansub",
|
||||
"hardsub"
|
||||
],
|
||||
"language_affixes": [
|
||||
"dublado",
|
||||
"dubbed",
|
||||
"dub"
|
||||
],
|
||||
"language_prefixes": [
|
||||
"true"
|
||||
],
|
||||
"language_suffixes": [
|
||||
"audio"
|
||||
],
|
||||
"weak_affixes": [
|
||||
"v",
|
||||
"audio",
|
||||
"true"
|
||||
]
|
||||
},
|
||||
"part": {
|
||||
"prefixes": [
|
||||
"pt",
|
||||
"part"
|
||||
]
|
||||
},
|
||||
"release_group": {
|
||||
"forbidden_names": [
|
||||
"bonus",
|
||||
"by",
|
||||
"for",
|
||||
"par",
|
||||
"pour",
|
||||
"rip"
|
||||
],
|
||||
"ignored_seps": "[]{}()"
|
||||
},
|
||||
"screen_size": {
|
||||
"frame_rates": [
|
||||
"23.976",
|
||||
"24",
|
||||
"25",
|
||||
"29.970",
|
||||
"30",
|
||||
"48",
|
||||
"50",
|
||||
"60",
|
||||
"120"
|
||||
],
|
||||
"min_ar": 1.333,
|
||||
"max_ar": 1.898,
|
||||
"interlaced": [
|
||||
"360",
|
||||
"480",
|
||||
"576",
|
||||
"900",
|
||||
"1080"
|
||||
],
|
||||
"progressive": [
|
||||
"360",
|
||||
"480",
|
||||
"540",
|
||||
"576",
|
||||
"900",
|
||||
"1080",
|
||||
"368",
|
||||
"720",
|
||||
"1440",
|
||||
"2160",
|
||||
"4320"
|
||||
]
|
||||
},
|
||||
"website": {
|
||||
"safe_tlds": [
|
||||
"com",
|
||||
"net",
|
||||
"org"
|
||||
],
|
||||
"safe_subdomains": [
|
||||
"www"
|
||||
],
|
||||
"safe_prefixes": [
|
||||
"co",
|
||||
"com",
|
||||
"net",
|
||||
"org"
|
||||
],
|
||||
"prefixes": [
|
||||
"from"
|
||||
]
|
||||
},
|
||||
"streaming_service": {
|
||||
"A&E": [
|
||||
"AE",
|
||||
"A&E"
|
||||
],
|
||||
"ABC": "AMBC",
|
||||
"ABC Australia": "AUBC",
|
||||
"Al Jazeera English": "AJAZ",
|
||||
"AMC": "AMC",
|
||||
"Amazon Prime": [
|
||||
"AMZN",
|
||||
"Amazon",
|
||||
"re:Amazon-?Prime"
|
||||
],
|
||||
"Adult Swim": [
|
||||
"AS",
|
||||
"re:Adult-?Swim"
|
||||
],
|
||||
"America's Test Kitchen": "ATK",
|
||||
"Animal Planet": "ANPL",
|
||||
"AnimeLab": "ANLB",
|
||||
"AOL": "AOL",
|
||||
"ARD": "ARD",
|
||||
"BBC iPlayer": [
|
||||
"iP",
|
||||
"re:BBC-?iPlayer"
|
||||
],
|
||||
"BravoTV": "BRAV",
|
||||
"Canal+": "CNLP",
|
||||
"Cartoon Network": "CN",
|
||||
"CBC": "CBC",
|
||||
"CBS": "CBS",
|
||||
"CNBC": "CNBC",
|
||||
"Comedy Central": [
|
||||
"CC",
|
||||
"re:Comedy-?Central"
|
||||
],
|
||||
"Channel 4": "4OD",
|
||||
"CHRGD": "CHGD",
|
||||
"Cinemax": "CMAX",
|
||||
"Country Music Television": "CMT",
|
||||
"Comedians in Cars Getting Coffee": "CCGC",
|
||||
"Crunchy Roll": [
|
||||
"CR",
|
||||
"re:Crunchy-?Roll"
|
||||
],
|
||||
"Crackle": "CRKL",
|
||||
"CSpan": "CSPN",
|
||||
"CTV": "CTV",
|
||||
"CuriosityStream": "CUR",
|
||||
"CWSeed": "CWS",
|
||||
"Daisuki": "DSKI",
|
||||
"DC Universe": "DCU",
|
||||
"Deadhouse Films": "DHF",
|
||||
"DramaFever": [
|
||||
"DF",
|
||||
"DramaFever"
|
||||
],
|
||||
"Digiturk Diledigin Yerde": "DDY",
|
||||
"Discovery": [
|
||||
"DISC",
|
||||
"Discovery"
|
||||
],
|
||||
"Disney": [
|
||||
"DSNY",
|
||||
"Disney"
|
||||
],
|
||||
"DIY Network": "DIY",
|
||||
"Doc Club": "DOCC",
|
||||
"DPlay": "DPLY",
|
||||
"E!": "ETV",
|
||||
"ePix": "EPIX",
|
||||
"El Trece": "ETTV",
|
||||
"ESPN": "ESPN",
|
||||
"Esquire": "ESQ",
|
||||
"Family": "FAM",
|
||||
"Family Jr": "FJR",
|
||||
"Food Network": "FOOD",
|
||||
"Fox": "FOX",
|
||||
"Freeform": "FREE",
|
||||
"FYI Network": "FYI",
|
||||
"Global": "GLBL",
|
||||
"GloboSat Play": "GLOB",
|
||||
"Hallmark": "HLMK",
|
||||
"HBO Go": [
|
||||
"HBO",
|
||||
"re:HBO-?Go"
|
||||
],
|
||||
"HGTV": "HGTV",
|
||||
"History": [
|
||||
"HIST",
|
||||
"History"
|
||||
],
|
||||
"Hulu": "HULU",
|
||||
"Investigation Discovery": "ID",
|
||||
"IFC": "IFC",
|
||||
"iTunes": "iTunes",
|
||||
"ITV": "ITV",
|
||||
"Knowledge Network": "KNOW",
|
||||
"Lifetime": "LIFE",
|
||||
"Motor Trend OnDemand": "MTOD",
|
||||
"MBC": [
|
||||
"MBC",
|
||||
"MBCVOD"
|
||||
],
|
||||
"MSNBC": "MNBC",
|
||||
"MTV": "MTV",
|
||||
"National Geographic": [
|
||||
"NATG",
|
||||
"re:National-?Geographic"
|
||||
],
|
||||
"NBA TV": [
|
||||
"NBA",
|
||||
"re:NBA-?TV"
|
||||
],
|
||||
"NBC": "NBC",
|
||||
"Netflix": [
|
||||
"NF",
|
||||
"Netflix"
|
||||
],
|
||||
"NFL": "NFL",
|
||||
"NFL Now": "NFLN",
|
||||
"NHL GameCenter": "GC",
|
||||
"Nickelodeon": [
|
||||
"NICK",
|
||||
"Nickelodeon"
|
||||
],
|
||||
"Norsk Rikskringkasting": "NRK",
|
||||
"OnDemandKorea": [
|
||||
"ODK",
|
||||
"OnDemandKorea"
|
||||
],
|
||||
"PBS": "PBS",
|
||||
"PBS Kids": "PBSK",
|
||||
"Playstation Network": "PSN",
|
||||
"Pluzz": "PLUZ",
|
||||
"RTE One": "RTE",
|
||||
"SBS (AU)": "SBS",
|
||||
"SeeSo": [
|
||||
"SESO",
|
||||
"SeeSo"
|
||||
],
|
||||
"Shomi": "SHMI",
|
||||
"Spike": "SPIK",
|
||||
"Spike TV": [
|
||||
"SPKE",
|
||||
"re:Spike-?TV"
|
||||
],
|
||||
"Sportsnet": "SNET",
|
||||
"Sprout": "SPRT",
|
||||
"Stan": "STAN",
|
||||
"Starz": "STZ",
|
||||
"Sveriges Television": "SVT",
|
||||
"SwearNet": "SWER",
|
||||
"Syfy": "SYFY",
|
||||
"TBS": "TBS",
|
||||
"TFou": "TFOU",
|
||||
"The CW": [
|
||||
"CW",
|
||||
"re:The-?CW"
|
||||
],
|
||||
"TLC": "TLC",
|
||||
"TubiTV": "TUBI",
|
||||
"TV3 Ireland": "TV3",
|
||||
"TV4 Sweeden": "TV4",
|
||||
"TVING": "TVING",
|
||||
"TV Land": [
|
||||
"TVL",
|
||||
"re:TV-?Land"
|
||||
],
|
||||
"UFC": "UFC",
|
||||
"UKTV": "UKTV",
|
||||
"Univision": "UNIV",
|
||||
"USA Network": "USAN",
|
||||
"Velocity": "VLCT",
|
||||
"VH1": "VH1",
|
||||
"Viceland": "VICE",
|
||||
"Viki": "VIKI",
|
||||
"Vimeo": "VMEO",
|
||||
"VRV": "VRV",
|
||||
"W Network": "WNET",
|
||||
"WatchMe": "WME",
|
||||
"WWE Network": "WWEN",
|
||||
"Xbox Video": "XBOX",
|
||||
"Yahoo": "YHOO",
|
||||
"YouTube Red": "RED",
|
||||
"ZDF": "ZDF"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,14 +4,10 @@
|
|||
JSON Utils
|
||||
"""
|
||||
import json
|
||||
try:
|
||||
from collections import OrderedDict
|
||||
except ImportError: # pragma: no-cover
|
||||
from ordereddict import OrderedDict # pylint:disable=import-error
|
||||
|
||||
from six import text_type
|
||||
from rebulk.match import Match
|
||||
|
||||
|
||||
class GuessitEncoder(json.JSONEncoder):
|
||||
"""
|
||||
JSON Encoder for guessit response
|
||||
|
@ -19,14 +15,8 @@ class GuessitEncoder(json.JSONEncoder):
|
|||
|
||||
def default(self, o): # pylint:disable=method-hidden
|
||||
if isinstance(o, Match):
|
||||
ret = OrderedDict()
|
||||
ret['value'] = o.value
|
||||
if o.raw:
|
||||
ret['raw'] = o.raw
|
||||
ret['start'] = o.start
|
||||
ret['end'] = o.end
|
||||
return ret
|
||||
elif hasattr(o, 'name'): # Babelfish languages/countries long name
|
||||
return str(o.name)
|
||||
else: # pragma: no cover
|
||||
return str(o)
|
||||
return o.advanced
|
||||
if hasattr(o, 'name'): # Babelfish languages/countries long name
|
||||
return text_type(o.name)
|
||||
# pragma: no cover
|
||||
return text_type(o)
|
||||
|
|
34
libs/guessit/monkeypatch.py
Normal file
34
libs/guessit/monkeypatch.py
Normal file
|
@ -0,0 +1,34 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Monkeypatch initialisation functions
|
||||
"""
|
||||
|
||||
try:
|
||||
from collections import OrderedDict
|
||||
except ImportError: # pragma: no-cover
|
||||
from ordereddict import OrderedDict # pylint:disable=import-error
|
||||
|
||||
from rebulk.match import Match
|
||||
|
||||
|
||||
def monkeypatch_rebulk():
|
||||
"""Monkeypatch rebulk classes"""
|
||||
|
||||
@property
|
||||
def match_advanced(self):
|
||||
"""
|
||||
Build advanced dict from match
|
||||
:param self:
|
||||
:return:
|
||||
"""
|
||||
|
||||
ret = OrderedDict()
|
||||
ret['value'] = self.value
|
||||
if self.raw:
|
||||
ret['raw'] = self.raw
|
||||
ret['start'] = self.start
|
||||
ret['end'] = self.end
|
||||
return ret
|
||||
|
||||
Match.advanced = match_advanced
|
|
@ -3,10 +3,12 @@
|
|||
"""
|
||||
Options
|
||||
"""
|
||||
import copy
|
||||
import json
|
||||
import os
|
||||
import pkgutil
|
||||
import shlex
|
||||
|
||||
from argparse import ArgumentParser
|
||||
|
||||
import six
|
||||
|
@ -42,6 +44,10 @@ def build_argument_parser():
|
|||
help='Expected title to parse (can be used multiple times)')
|
||||
naming_opts.add_argument('-G', '--expected-group', action='append', dest='expected_group', default=None,
|
||||
help='Expected release group (can be used multiple times)')
|
||||
naming_opts.add_argument('--includes', action='append', dest='includes', default=None,
|
||||
help='List of properties to be detected')
|
||||
naming_opts.add_argument('--excludes', action='append', dest='excludes', default=None,
|
||||
help='List of properties to be ignored')
|
||||
|
||||
input_opts = opts.add_argument_group("Input")
|
||||
input_opts.add_argument('-f', '--input-file', dest='input_file', default=None,
|
||||
|
@ -65,14 +71,20 @@ def build_argument_parser():
|
|||
|
||||
conf_opts = opts.add_argument_group("Configuration")
|
||||
conf_opts.add_argument('-c', '--config', dest='config', action='append', default=None,
|
||||
help='Filepath to the configuration file. Configuration contains the same options as '
|
||||
'those command line options, but option names have "-" characters replaced with "_". '
|
||||
'If not defined, guessit tries to read a configuration default configuration file at '
|
||||
'~/.guessit/options.(json|yml|yaml) and ~/.config/guessit/options.(json|yml|yaml). '
|
||||
'Set to "false" to disable default configuration file loading.')
|
||||
conf_opts.add_argument('--no-embedded-config', dest='no_embedded_config', action='store_true',
|
||||
help='Filepath to configuration file. Configuration file contains the same '
|
||||
'options as those from command line options, but option names have "-" characters '
|
||||
'replaced with "_". This configuration will be merged with default and user '
|
||||
'configuration files.')
|
||||
conf_opts.add_argument('--no-user-config', dest='no_user_config', action='store_true',
|
||||
default=None,
|
||||
help='Disable default configuration.')
|
||||
help='Disable user configuration. If not defined, guessit tries to read configuration files '
|
||||
'at ~/.guessit/options.(json|yml|yaml) and ~/.config/guessit/options.(json|yml|yaml)')
|
||||
conf_opts.add_argument('--no-default-config', dest='no_default_config', action='store_true',
|
||||
default=None,
|
||||
help='Disable default configuration. This should be done only if you are providing a full '
|
||||
'configuration through user configuration or --config option. If no "advanced_config" '
|
||||
'is provided by another configuration file, it will still be loaded from default '
|
||||
'configuration.')
|
||||
|
||||
information_opts = opts.add_argument_group("Information")
|
||||
information_opts.add_argument('-p', '--properties', dest='properties', action='store_true', default=None,
|
||||
|
@ -92,7 +104,7 @@ def parse_options(options=None, api=False):
|
|||
:param options:
|
||||
:type options:
|
||||
:param api
|
||||
:type boolean
|
||||
:type api: boolean
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
|
@ -116,93 +128,113 @@ class ConfigurationException(Exception):
|
|||
"""
|
||||
Exception related to configuration file.
|
||||
"""
|
||||
pass
|
||||
pass # pylint:disable=unnecessary-pass
|
||||
|
||||
|
||||
def load_config(options):
|
||||
"""
|
||||
Load configuration from configuration file, if defined.
|
||||
Load options from configuration files, if defined and present.
|
||||
:param options:
|
||||
:type options:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
config_files_enabled = True
|
||||
custom_config_files = None
|
||||
if options.get('config') is not None:
|
||||
custom_config_files = options.get('config')
|
||||
if not custom_config_files \
|
||||
or not custom_config_files[0] \
|
||||
or custom_config_files[0].lower() in ['0', 'no', 'false', 'disabled']:
|
||||
config_files_enabled = False
|
||||
|
||||
configurations = []
|
||||
if config_files_enabled:
|
||||
|
||||
if not options.get('no_default_config'):
|
||||
default_options_data = pkgutil.get_data('guessit', 'config/options.json').decode('utf-8')
|
||||
default_options = json.loads(default_options_data)
|
||||
configurations.append(default_options)
|
||||
|
||||
config_files = []
|
||||
|
||||
if not options.get('no_user_config'):
|
||||
home_directory = os.path.expanduser("~")
|
||||
cwd = os.getcwd()
|
||||
yaml_supported = False
|
||||
try:
|
||||
import yaml # pylint: disable=unused-variable
|
||||
import yaml # pylint:disable=unused-variable,unused-import
|
||||
yaml_supported = True
|
||||
except ImportError:
|
||||
pass
|
||||
config_file_locations = get_config_file_locations(home_directory, cwd, yaml_supported)
|
||||
|
||||
config_file_locations = get_options_file_locations(home_directory, cwd, yaml_supported)
|
||||
config_files = [f for f in config_file_locations if os.path.exists(f)]
|
||||
|
||||
if custom_config_files:
|
||||
config_files = config_files + custom_config_files
|
||||
custom_config_files = options.get('config')
|
||||
if custom_config_files:
|
||||
config_files = config_files + custom_config_files
|
||||
|
||||
for config_file in config_files:
|
||||
config_file_options = load_config_file(config_file)
|
||||
if config_file_options:
|
||||
configurations.append(config_file_options)
|
||||
|
||||
if not options.get('no_embedded_config'):
|
||||
embedded_options_data = pkgutil.get_data('guessit', 'config/options.json').decode("utf-8")
|
||||
embedded_options = json.loads(embedded_options_data)
|
||||
configurations.append(embedded_options)
|
||||
for config_file in config_files:
|
||||
config_file_options = load_config_file(config_file)
|
||||
if config_file_options:
|
||||
configurations.append(config_file_options)
|
||||
|
||||
config = {}
|
||||
if configurations:
|
||||
configurations.append(options)
|
||||
return merge_configurations(*configurations)
|
||||
config = merge_options(*configurations)
|
||||
|
||||
return options
|
||||
if 'advanced_config' not in config:
|
||||
# Guessit doesn't work without advanced_config, so we use default if no configuration files provides it.
|
||||
default_options_data = pkgutil.get_data('guessit', 'config/options.json').decode('utf-8')
|
||||
default_options = json.loads(default_options_data)
|
||||
config['advanced_config'] = default_options['advanced_config']
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def merge_configurations(*configurations):
|
||||
def merge_options(*options):
|
||||
"""
|
||||
Merge configurations into a single options dict.
|
||||
:param configurations:
|
||||
:type configurations:
|
||||
Merge options into a single options dict.
|
||||
:param options:
|
||||
:type options:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
|
||||
merged = {}
|
||||
if options:
|
||||
if options[0]:
|
||||
merged.update(copy.deepcopy(options[0]))
|
||||
|
||||
for options in configurations:
|
||||
pristine = options.get('pristine')
|
||||
for options in options[1:]:
|
||||
if options:
|
||||
pristine = options.get('pristine')
|
||||
|
||||
if pristine:
|
||||
if pristine is True:
|
||||
merged = {}
|
||||
else:
|
||||
for to_reset in pristine:
|
||||
if to_reset in merged:
|
||||
del merged[to_reset]
|
||||
if pristine is True:
|
||||
merged = {}
|
||||
elif pristine:
|
||||
for to_reset in pristine:
|
||||
if to_reset in merged:
|
||||
del merged[to_reset]
|
||||
|
||||
for (option, value) in options.items():
|
||||
if value is not None and option != 'pristine':
|
||||
if option in merged.keys() and isinstance(merged[option], list):
|
||||
merged[option].extend(value)
|
||||
elif isinstance(value, list):
|
||||
merged[option] = list(value)
|
||||
else:
|
||||
merged[option] = value
|
||||
for (option, value) in options.items():
|
||||
merge_option_value(option, value, merged)
|
||||
|
||||
return merged
|
||||
|
||||
|
||||
def merge_option_value(option, value, merged):
|
||||
"""
|
||||
Merge option value
|
||||
:param option:
|
||||
:param value:
|
||||
:param merged:
|
||||
:return:
|
||||
"""
|
||||
if value is not None and option != 'pristine':
|
||||
if option in merged.keys() and isinstance(merged[option], list):
|
||||
for val in value:
|
||||
if val not in merged[option]:
|
||||
merged[option].append(val)
|
||||
elif option in merged.keys() and isinstance(merged[option], dict):
|
||||
merged[option] = merge_options(merged[option], value)
|
||||
elif isinstance(value, list):
|
||||
merged[option] = list(value)
|
||||
else:
|
||||
merged[option] = value
|
||||
|
||||
|
||||
def load_config_file(filepath):
|
||||
"""
|
||||
Load a configuration as an options dict.
|
||||
|
@ -220,17 +252,24 @@ def load_config_file(filepath):
|
|||
try:
|
||||
import yaml
|
||||
with open(filepath) as config_file_data:
|
||||
return yaml.load(config_file_data)
|
||||
return yaml.load(config_file_data, yaml.SafeLoader)
|
||||
except ImportError: # pragma: no cover
|
||||
raise ConfigurationException('Configuration file extension is not supported. '
|
||||
'PyYAML should be installed to support "%s" file' % (
|
||||
filepath,))
|
||||
|
||||
try:
|
||||
# Try to load input as JSON
|
||||
return json.loads(filepath)
|
||||
except: # pylint: disable=bare-except
|
||||
pass
|
||||
|
||||
raise ConfigurationException('Configuration file extension is not supported for "%s" file.' % (filepath,))
|
||||
|
||||
|
||||
def get_config_file_locations(homedir, cwd, yaml_supported=False):
|
||||
def get_options_file_locations(homedir, cwd, yaml_supported=False):
|
||||
"""
|
||||
Get all possible locations for configuration file.
|
||||
Get all possible locations for options file.
|
||||
:param homedir: user home directory
|
||||
:type homedir: basestring
|
||||
:param cwd: current working directory
|
||||
|
|
|
@ -10,7 +10,7 @@ from .markers.groups import groups
|
|||
|
||||
from .properties.episodes import episodes
|
||||
from .properties.container import container
|
||||
from .properties.format import format_
|
||||
from .properties.source import source
|
||||
from .properties.video_codec import video_codec
|
||||
from .properties.audio_codec import audio_codec
|
||||
from .properties.screen_size import screen_size
|
||||
|
@ -24,6 +24,7 @@ from .properties.release_group import release_group
|
|||
from .properties.streaming_service import streaming_service
|
||||
from .properties.other import other
|
||||
from .properties.size import size
|
||||
from .properties.bit_rate import bit_rate
|
||||
from .properties.edition import edition
|
||||
from .properties.cds import cds
|
||||
from .properties.bonus import bonus
|
||||
|
@ -36,44 +37,50 @@ from .properties.type import type_
|
|||
from .processors import processors
|
||||
|
||||
|
||||
def rebulk_builder():
|
||||
def rebulk_builder(config):
|
||||
"""
|
||||
Default builder for main Rebulk object used by api.
|
||||
:return: Main Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
def _config(name):
|
||||
return config.get(name, {})
|
||||
|
||||
rebulk = Rebulk()
|
||||
|
||||
rebulk.rebulk(path())
|
||||
rebulk.rebulk(groups())
|
||||
common_words = frozenset(_config('common_words'))
|
||||
|
||||
rebulk.rebulk(episodes())
|
||||
rebulk.rebulk(container())
|
||||
rebulk.rebulk(format_())
|
||||
rebulk.rebulk(video_codec())
|
||||
rebulk.rebulk(audio_codec())
|
||||
rebulk.rebulk(screen_size())
|
||||
rebulk.rebulk(website())
|
||||
rebulk.rebulk(date())
|
||||
rebulk.rebulk(title())
|
||||
rebulk.rebulk(episode_title())
|
||||
rebulk.rebulk(language())
|
||||
rebulk.rebulk(country())
|
||||
rebulk.rebulk(release_group())
|
||||
rebulk.rebulk(streaming_service())
|
||||
rebulk.rebulk(other())
|
||||
rebulk.rebulk(size())
|
||||
rebulk.rebulk(edition())
|
||||
rebulk.rebulk(cds())
|
||||
rebulk.rebulk(bonus())
|
||||
rebulk.rebulk(film())
|
||||
rebulk.rebulk(part())
|
||||
rebulk.rebulk(crc())
|
||||
rebulk.rebulk(path(_config('path')))
|
||||
rebulk.rebulk(groups(_config('groups')))
|
||||
|
||||
rebulk.rebulk(processors())
|
||||
rebulk.rebulk(episodes(_config('episodes')))
|
||||
rebulk.rebulk(container(_config('container')))
|
||||
rebulk.rebulk(source(_config('source')))
|
||||
rebulk.rebulk(video_codec(_config('video_codec')))
|
||||
rebulk.rebulk(audio_codec(_config('audio_codec')))
|
||||
rebulk.rebulk(screen_size(_config('screen_size')))
|
||||
rebulk.rebulk(website(_config('website')))
|
||||
rebulk.rebulk(date(_config('date')))
|
||||
rebulk.rebulk(title(_config('title')))
|
||||
rebulk.rebulk(episode_title(_config('episode_title')))
|
||||
rebulk.rebulk(language(_config('language'), common_words))
|
||||
rebulk.rebulk(country(_config('country'), common_words))
|
||||
rebulk.rebulk(release_group(_config('release_group')))
|
||||
rebulk.rebulk(streaming_service(_config('streaming_service')))
|
||||
rebulk.rebulk(other(_config('other')))
|
||||
rebulk.rebulk(size(_config('size')))
|
||||
rebulk.rebulk(bit_rate(_config('bit_rate')))
|
||||
rebulk.rebulk(edition(_config('edition')))
|
||||
rebulk.rebulk(cds(_config('cds')))
|
||||
rebulk.rebulk(bonus(_config('bonus')))
|
||||
rebulk.rebulk(film(_config('film')))
|
||||
rebulk.rebulk(part(_config('part')))
|
||||
rebulk.rebulk(crc(_config('crc')))
|
||||
|
||||
rebulk.rebulk(mimetype())
|
||||
rebulk.rebulk(type_())
|
||||
rebulk.rebulk(processors(_config('processors')))
|
||||
|
||||
rebulk.rebulk(mimetype(_config('mimetype')))
|
||||
rebulk.rebulk(type_(_config('type')))
|
||||
|
||||
def customize_properties(properties):
|
||||
"""
|
||||
|
|
|
@ -13,9 +13,12 @@ def marker_comparator_predicate(match):
|
|||
"""
|
||||
Match predicate used in comparator
|
||||
"""
|
||||
return not match.private and \
|
||||
match.name not in ['proper_count', 'title', 'episode_title', 'alternative_title'] and \
|
||||
not (match.name == 'container' and 'extension' in match.tags)
|
||||
return (
|
||||
not match.private
|
||||
and match.name not in ('proper_count', 'title')
|
||||
and not (match.name == 'container' and 'extension' in match.tags)
|
||||
and not (match.name == 'other' and match.value == 'Rip')
|
||||
)
|
||||
|
||||
|
||||
def marker_weight(matches, marker, predicate):
|
||||
|
@ -50,9 +53,8 @@ def marker_comparator(matches, markers, predicate):
|
|||
matches_count = marker_weight(matches, marker2, predicate) - marker_weight(matches, marker1, predicate)
|
||||
if matches_count:
|
||||
return matches_count
|
||||
len_diff = len(marker2) - len(marker1)
|
||||
if len_diff:
|
||||
return len_diff
|
||||
|
||||
# give preference to rightmost path
|
||||
return markers.index(marker2) - markers.index(marker1)
|
||||
|
||||
return comparator
|
||||
|
|
|
@ -42,7 +42,7 @@ def _is_int(string):
|
|||
return False
|
||||
|
||||
|
||||
def _guess_day_first_parameter(groups):
|
||||
def _guess_day_first_parameter(groups): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
If day_first is not defined, use some heuristic to fix it.
|
||||
It helps to solve issues with python dateutils 2.5.3 parser changes.
|
||||
|
@ -57,17 +57,17 @@ def _guess_day_first_parameter(groups):
|
|||
if _is_int(groups[0]) and valid_year(int(groups[0][:4])):
|
||||
return False
|
||||
# If match ends with a long year, the day_first is forced to true.
|
||||
elif _is_int(groups[-1]) and valid_year(int(groups[-1][-4:])):
|
||||
if _is_int(groups[-1]) and valid_year(int(groups[-1][-4:])):
|
||||
return True
|
||||
# If match starts with a short year, then day_first is force to false.
|
||||
elif _is_int(groups[0]) and int(groups[0][:2]) > 31:
|
||||
if _is_int(groups[0]) and int(groups[0][:2]) > 31:
|
||||
return False
|
||||
# If match ends with a short year, then day_first is force to true.
|
||||
elif _is_int(groups[-1]) and int(groups[-1][-2:]) > 31:
|
||||
if _is_int(groups[-1]) and int(groups[-1][-2:]) > 31:
|
||||
return True
|
||||
|
||||
|
||||
def search_date(string, year_first=None, day_first=None):
|
||||
def search_date(string, year_first=None, day_first=None): # pylint:disable=inconsistent-return-statements
|
||||
"""Looks for date patterns, and if found return the date and group span.
|
||||
|
||||
Assumes there are sentinels at the beginning and end of the string that
|
||||
|
|
|
@ -25,7 +25,7 @@ def _potential_before(i, input_string):
|
|||
:return:
|
||||
:rtype: bool
|
||||
"""
|
||||
return i - 2 >= 0 and input_string[i] == input_string[i - 2] and input_string[i - 1] not in seps
|
||||
return i - 1 >= 0 and input_string[i] in seps and input_string[i - 2] in seps and input_string[i - 1] not in seps
|
||||
|
||||
|
||||
def _potential_after(i, input_string):
|
||||
|
|
27
libs/guessit/rules/common/pattern.py
Normal file
27
libs/guessit/rules/common/pattern.py
Normal file
|
@ -0,0 +1,27 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Pattern utility functions
|
||||
"""
|
||||
|
||||
|
||||
def is_disabled(context, name):
|
||||
"""Whether a specific pattern is disabled.
|
||||
|
||||
The context object might define an inclusion list (includes) or an exclusion list (excludes)
|
||||
A pattern is considered disabled if it's found in the exclusion list or
|
||||
it's not found in the inclusion list and the inclusion list is not empty or not defined.
|
||||
|
||||
:param context:
|
||||
:param name:
|
||||
:return:
|
||||
"""
|
||||
if not context:
|
||||
return False
|
||||
|
||||
excludes = context.get('excludes')
|
||||
if excludes and name in excludes:
|
||||
return True
|
||||
|
||||
includes = context.get('includes')
|
||||
return includes and name not in includes
|
106
libs/guessit/rules/common/quantity.py
Normal file
106
libs/guessit/rules/common/quantity.py
Normal file
|
@ -0,0 +1,106 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Quantities: Size
|
||||
"""
|
||||
import re
|
||||
from abc import abstractmethod
|
||||
|
||||
import six
|
||||
|
||||
from ..common import seps
|
||||
|
||||
|
||||
class Quantity(object):
|
||||
"""
|
||||
Represent a quantity object with magnitude and units.
|
||||
"""
|
||||
|
||||
parser_re = re.compile(r'(?P<magnitude>\d+(?:[.]\d+)?)(?P<units>[^\d]+)?')
|
||||
|
||||
def __init__(self, magnitude, units):
|
||||
self.magnitude = magnitude
|
||||
self.units = units
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def parse_units(cls, value):
|
||||
"""
|
||||
Parse a string to a proper unit notation.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@classmethod
|
||||
def fromstring(cls, string):
|
||||
"""
|
||||
Parse the string into a quantity object.
|
||||
:param string:
|
||||
:return:
|
||||
"""
|
||||
values = cls.parser_re.match(string).groupdict()
|
||||
try:
|
||||
magnitude = int(values['magnitude'])
|
||||
except ValueError:
|
||||
magnitude = float(values['magnitude'])
|
||||
units = cls.parse_units(values['units'])
|
||||
|
||||
return cls(magnitude, units)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self))
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, six.string_types):
|
||||
return str(self) == other
|
||||
if not isinstance(other, self.__class__):
|
||||
return NotImplemented
|
||||
return self.magnitude == other.magnitude and self.units == other.units
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __repr__(self):
|
||||
return '<{0} [{1}]>'.format(self.__class__.__name__, self)
|
||||
|
||||
def __str__(self):
|
||||
return '{0}{1}'.format(self.magnitude, self.units)
|
||||
|
||||
|
||||
class Size(Quantity):
|
||||
"""
|
||||
Represent size.
|
||||
|
||||
e.g.: 1.1GB, 300MB
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def parse_units(cls, value):
|
||||
return value.strip(seps).upper()
|
||||
|
||||
|
||||
class BitRate(Quantity):
|
||||
"""
|
||||
Represent bit rate.
|
||||
|
||||
e.g.: 320Kbps, 1.5Mbps
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def parse_units(cls, value):
|
||||
value = value.strip(seps).capitalize()
|
||||
for token in ('bits', 'bit'):
|
||||
value = value.replace(token, 'bps')
|
||||
|
||||
return value
|
||||
|
||||
|
||||
class FrameRate(Quantity):
|
||||
"""
|
||||
Represent frame rate.
|
||||
|
||||
e.g.: 24fps, 60fps
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def parse_units(cls, value):
|
||||
return 'fps'
|
|
@ -28,7 +28,7 @@ def int_coercable(string):
|
|||
return False
|
||||
|
||||
|
||||
def compose(*validators):
|
||||
def and_(*validators):
|
||||
"""
|
||||
Compose validators functions
|
||||
:param validators:
|
||||
|
@ -49,3 +49,26 @@ def compose(*validators):
|
|||
return False
|
||||
return True
|
||||
return composed
|
||||
|
||||
|
||||
def or_(*validators):
|
||||
"""
|
||||
Compose validators functions
|
||||
:param validators:
|
||||
:type validators:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
def composed(string):
|
||||
"""
|
||||
Composed validators function
|
||||
:param string:
|
||||
:type string:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
for validator in validators:
|
||||
if validator(string):
|
||||
return True
|
||||
return False
|
||||
return composed
|
||||
|
|
|
@ -32,48 +32,3 @@ def iter_words(string):
|
|||
i += 1
|
||||
if inside_word:
|
||||
yield _Word(span=(last_sep_index+1, i), value=string[last_sep_index+1:i])
|
||||
|
||||
|
||||
# list of common words which could be interpreted as properties, but which
|
||||
# are far too common to be able to say they represent a property in the
|
||||
# middle of a string (where they most likely carry their commmon meaning)
|
||||
COMMON_WORDS = frozenset([
|
||||
# english words
|
||||
'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to',
|
||||
'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan',
|
||||
'fry', 'cop', 'zen', 'gay', 'fat', 'one', 'cherokee', 'got', 'an', 'as',
|
||||
'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi', 'bb',
|
||||
'bt', 'tv', 'aw', 'by', 'md', 'mp', 'cd', 'lt', 'gt', 'in', 'ad', 'ice',
|
||||
'ay', 'at', 'star', 'so', 'he', 'do', 'ax', 'mx',
|
||||
# french words
|
||||
'bas', 'de', 'le', 'son', 'ne', 'ca', 'ce', 'et', 'que',
|
||||
'mal', 'est', 'vol', 'or', 'mon', 'se', 'je', 'tu', 'me',
|
||||
'ne', 'ma', 'va', 'au', 'lu',
|
||||
# japanese words,
|
||||
'wa', 'ga', 'ao',
|
||||
# spanish words
|
||||
'la', 'el', 'del', 'por', 'mar', 'al',
|
||||
# italian words
|
||||
'un',
|
||||
# other
|
||||
'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii',
|
||||
'vi', 'ben', 'da', 'lt', 'ch', 'sr', 'ps', 'cx', 'vo',
|
||||
# new from babelfish
|
||||
'mkv', 'avi', 'dmd', 'the', 'dis', 'cut', 'stv', 'des', 'dia', 'and',
|
||||
'cab', 'sub', 'mia', 'rim', 'las', 'une', 'par', 'srt', 'ano', 'toy',
|
||||
'job', 'gag', 'reel', 'www', 'for', 'ayu', 'csi', 'ren', 'moi', 'sur',
|
||||
'fer', 'fun', 'two', 'big', 'psy', 'air',
|
||||
# movie title
|
||||
'brazil', 'jordan',
|
||||
# release groups
|
||||
'bs', # Bosnian
|
||||
'kz',
|
||||
# countries
|
||||
'gt', 'lt', 'im',
|
||||
# part/pt
|
||||
'pt',
|
||||
# screener
|
||||
'scr',
|
||||
# quality
|
||||
'sd', 'hr'
|
||||
])
|
||||
|
|
|
@ -6,17 +6,20 @@ Groups markers (...), [...] and {...}
|
|||
from rebulk import Rebulk
|
||||
|
||||
|
||||
def groups():
|
||||
def groups(config):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk()
|
||||
rebulk.defaults(name="group", marker=True)
|
||||
|
||||
starting = '([{'
|
||||
ending = ')]}'
|
||||
starting = config['starting']
|
||||
ending = config['ending']
|
||||
|
||||
def mark_groups(input_string):
|
||||
"""
|
||||
|
|
|
@ -8,9 +8,12 @@ from rebulk import Rebulk
|
|||
from rebulk.utils import find_all
|
||||
|
||||
|
||||
def path():
|
||||
def path(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
|
@ -22,6 +25,7 @@ def path():
|
|||
Functional pattern to mark path elements.
|
||||
|
||||
:param input_string:
|
||||
:param context:
|
||||
:return:
|
||||
"""
|
||||
ret = []
|
||||
|
|
20
libs/guessit/rules/match_processors.py
Normal file
20
libs/guessit/rules/match_processors.py
Normal file
|
@ -0,0 +1,20 @@
|
|||
"""
|
||||
Match processors
|
||||
"""
|
||||
from guessit.rules.common import seps
|
||||
|
||||
|
||||
def strip(match, chars=seps):
|
||||
"""
|
||||
Strip given characters from match.
|
||||
|
||||
:param chars:
|
||||
:param match:
|
||||
:return:
|
||||
"""
|
||||
while match.input_string[match.start] in chars:
|
||||
match.start += 1
|
||||
while match.input_string[match.end - 1] in chars:
|
||||
match.end -= 1
|
||||
if not match:
|
||||
return False
|
|
@ -36,6 +36,7 @@ class EnlargeGroupMatches(CustomRule):
|
|||
|
||||
if starting or ending:
|
||||
return starting, ending
|
||||
return False
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
starting, ending = when_response
|
||||
|
@ -193,6 +194,23 @@ class SeasonYear(Rule):
|
|||
return ret
|
||||
|
||||
|
||||
class YearSeason(Rule):
|
||||
"""
|
||||
If a year is found, no season found, and episode is found, create an match with season.
|
||||
"""
|
||||
priority = POST_PROCESS
|
||||
consequence = AppendMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
if not matches.named('season') and matches.named('episode'):
|
||||
for year in matches.named('year'):
|
||||
season = copy.copy(year)
|
||||
season.name = 'season'
|
||||
ret.append(season)
|
||||
return ret
|
||||
|
||||
|
||||
class Processors(CustomRule):
|
||||
"""
|
||||
Empty rule for ordering post_processing properly.
|
||||
|
@ -226,13 +244,16 @@ class StripSeparators(CustomRule):
|
|||
match.raw_end -= 1
|
||||
|
||||
|
||||
def processors():
|
||||
def processors(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
return Rebulk().rules(EnlargeGroupMatches, EquivalentHoles,
|
||||
RemoveLessSpecificSeasonEpisode('season'),
|
||||
RemoveLessSpecificSeasonEpisode('episode'),
|
||||
RemoveAmbiguous, SeasonYear, Processors, StripSeparators)
|
||||
RemoveAmbiguous, SeasonYear, YearSeason, Processors, StripSeparators)
|
||||
|
|
|
@ -3,22 +3,28 @@
|
|||
"""
|
||||
audio_codec, audio_profile and audio_channels property
|
||||
"""
|
||||
from rebulk import Rebulk, Rule, RemoveMatch
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk, Rule, RemoveMatch
|
||||
from ..common import dash
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_before, seps_after
|
||||
|
||||
audio_properties = ['audio_codec', 'audio_profile', 'audio_channels']
|
||||
|
||||
|
||||
def audio_codec():
|
||||
def audio_codec(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
|
||||
rebulk = Rebulk()\
|
||||
.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])\
|
||||
.string_defaults(ignore_case=True)
|
||||
|
||||
def audio_codec_priority(match1, match2):
|
||||
"""
|
||||
|
@ -36,37 +42,53 @@ def audio_codec():
|
|||
return match1
|
||||
return '__default__'
|
||||
|
||||
rebulk.defaults(name="audio_codec", conflict_solver=audio_codec_priority)
|
||||
rebulk.defaults(name='audio_codec',
|
||||
conflict_solver=audio_codec_priority,
|
||||
disabled=lambda context: is_disabled(context, 'audio_codec'))
|
||||
|
||||
rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3")
|
||||
rebulk.regex('Dolby', 'DolbyDigital', 'Dolby-Digital', 'DD', 'AC3D?', value='AC3')
|
||||
rebulk.regex("DolbyAtmos", "Dolby-Atmos", "Atmos", value="DolbyAtmos")
|
||||
rebulk.string("MP2", value="MP2")
|
||||
rebulk.regex('Dolby', 'DolbyDigital', 'Dolby-Digital', 'DD', 'AC3D?', value='Dolby Digital')
|
||||
rebulk.regex('Dolby-?Atmos', 'Atmos', value='Dolby Atmos')
|
||||
rebulk.string("AAC", value="AAC")
|
||||
rebulk.string('EAC3', 'DDP', 'DD+', value="EAC3")
|
||||
rebulk.string('EAC3', 'DDP', 'DD+', value='Dolby Digital Plus')
|
||||
rebulk.string("Flac", value="FLAC")
|
||||
rebulk.string("DTS", value="DTS")
|
||||
rebulk.regex("True-?HD", value="TrueHD")
|
||||
rebulk.regex('DTS-?HD', 'DTS(?=-?MA)', value='DTS-HD',
|
||||
conflict_solver=lambda match, other: other if other.name == 'audio_codec' else '__default__')
|
||||
rebulk.regex('True-?HD', value='Dolby TrueHD')
|
||||
rebulk.string('Opus', value='Opus')
|
||||
rebulk.string('Vorbis', value='Vorbis')
|
||||
rebulk.string('PCM', value='PCM')
|
||||
rebulk.string('LPCM', value='LPCM')
|
||||
|
||||
rebulk.defaults(name="audio_profile")
|
||||
rebulk.string("HD", value="HD", tags="DTS")
|
||||
rebulk.regex("HD-?MA", value="HDMA", tags="DTS")
|
||||
rebulk.string("HE", value="HE", tags="AAC")
|
||||
rebulk.string("LC", value="LC", tags="AAC")
|
||||
rebulk.string("HQ", value="HQ", tags="AC3")
|
||||
rebulk.defaults(clear=True,
|
||||
name='audio_profile',
|
||||
disabled=lambda context: is_disabled(context, 'audio_profile'))
|
||||
rebulk.string('MA', value='Master Audio', tags=['audio_profile.rule', 'DTS-HD'])
|
||||
rebulk.string('HR', 'HRA', value='High Resolution Audio', tags=['audio_profile.rule', 'DTS-HD'])
|
||||
rebulk.string('ES', value='Extended Surround', tags=['audio_profile.rule', 'DTS'])
|
||||
rebulk.string('HE', value='High Efficiency', tags=['audio_profile.rule', 'AAC'])
|
||||
rebulk.string('LC', value='Low Complexity', tags=['audio_profile.rule', 'AAC'])
|
||||
rebulk.string('HQ', value='High Quality', tags=['audio_profile.rule', 'Dolby Digital'])
|
||||
rebulk.string('EX', value='EX', tags=['audio_profile.rule', 'Dolby Digital'])
|
||||
|
||||
rebulk.defaults(name="audio_channels")
|
||||
rebulk.regex(r'(7[\W_][01](?:ch)?)(?:[^\d]|$)', value='7.1', children=True)
|
||||
rebulk.regex(r'(5[\W_][01](?:ch)?)(?:[^\d]|$)', value='5.1', children=True)
|
||||
rebulk.regex(r'(2[\W_]0(?:ch)?)(?:[^\d]|$)', value='2.0', children=True)
|
||||
rebulk.defaults(clear=True,
|
||||
name="audio_channels",
|
||||
disabled=lambda context: is_disabled(context, 'audio_channels'))
|
||||
rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels')
|
||||
rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels')
|
||||
rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels')
|
||||
rebulk.string('7ch', '8ch', value='7.1')
|
||||
rebulk.string('5ch', '6ch', value='5.1')
|
||||
rebulk.string('2ch', 'stereo', value='2.0')
|
||||
rebulk.string('1ch', 'mono', value='1.0')
|
||||
|
||||
rebulk.rules(DtsRule, AacRule, Ac3Rule, AudioValidatorRule, HqConflictRule, AudioChannelsValidatorRule)
|
||||
for value, items in config.get('audio_channels').items():
|
||||
for item in items:
|
||||
if item.startswith('re:'):
|
||||
rebulk.regex(item[3:], value=value, children=True)
|
||||
else:
|
||||
rebulk.string(item, value=value)
|
||||
|
||||
rebulk.rules(DtsHDRule, DtsRule, AacRule, DolbyDigitalRule, AudioValidatorRule, HqConflictRule,
|
||||
AudioChannelsValidatorRule)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
@ -111,25 +133,49 @@ class AudioProfileRule(Rule):
|
|||
super(AudioProfileRule, self).__init__()
|
||||
self.codec = codec
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'audio_profile')
|
||||
|
||||
def when(self, matches, context):
|
||||
profile_list = matches.named('audio_profile', lambda match: self.codec in match.tags)
|
||||
profile_list = matches.named('audio_profile',
|
||||
lambda match: 'audio_profile.rule' in match.tags and
|
||||
self.codec in match.tags)
|
||||
ret = []
|
||||
for profile in profile_list:
|
||||
codec = matches.previous(profile, lambda match: match.name == 'audio_codec' and match.value == self.codec)
|
||||
codec = matches.at_span(profile.span,
|
||||
lambda match: match.name == 'audio_codec' and
|
||||
match.value == self.codec, 0)
|
||||
if not codec:
|
||||
codec = matches.next(profile, lambda match: match.name == 'audio_codec' and match.value == self.codec)
|
||||
codec = matches.previous(profile,
|
||||
lambda match: match.name == 'audio_codec' and
|
||||
match.value == self.codec)
|
||||
if not codec:
|
||||
codec = matches.next(profile,
|
||||
lambda match: match.name == 'audio_codec' and
|
||||
match.value == self.codec)
|
||||
if not codec:
|
||||
ret.append(profile)
|
||||
if codec:
|
||||
ret.extend(matches.conflicting(profile))
|
||||
return ret
|
||||
|
||||
|
||||
class DtsHDRule(AudioProfileRule):
|
||||
"""
|
||||
Rule to validate DTS-HD profile
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(DtsHDRule, self).__init__('DTS-HD')
|
||||
|
||||
|
||||
class DtsRule(AudioProfileRule):
|
||||
"""
|
||||
Rule to validate DTS profile
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(DtsRule, self).__init__("DTS")
|
||||
super(DtsRule, self).__init__('DTS')
|
||||
|
||||
|
||||
class AacRule(AudioProfileRule):
|
||||
|
@ -138,16 +184,16 @@ class AacRule(AudioProfileRule):
|
|||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(AacRule, self).__init__("AAC")
|
||||
super(AacRule, self).__init__('AAC')
|
||||
|
||||
|
||||
class Ac3Rule(AudioProfileRule):
|
||||
class DolbyDigitalRule(AudioProfileRule):
|
||||
"""
|
||||
Rule to validate AC3 profile
|
||||
Rule to validate Dolby Digital profile
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(Ac3Rule, self).__init__("AC3")
|
||||
super(DolbyDigitalRule, self).__init__('Dolby Digital')
|
||||
|
||||
|
||||
class HqConflictRule(Rule):
|
||||
|
@ -155,16 +201,16 @@ class HqConflictRule(Rule):
|
|||
Solve conflict between HQ from other property and from audio_profile.
|
||||
"""
|
||||
|
||||
dependency = [DtsRule, AacRule, Ac3Rule]
|
||||
dependency = [DtsHDRule, DtsRule, AacRule, DolbyDigitalRule]
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
hq_audio = matches.named('audio_profile', lambda match: match.value == 'HQ')
|
||||
hq_audio_spans = [match.span for match in hq_audio]
|
||||
hq_other = matches.named('other', lambda match: match.span in hq_audio_spans)
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'audio_profile')
|
||||
|
||||
if hq_other:
|
||||
return hq_other
|
||||
def when(self, matches, context):
|
||||
hq_audio = matches.named('audio_profile', lambda m: m.value == 'High Quality')
|
||||
hq_audio_spans = [match.span for match in hq_audio]
|
||||
return matches.named('other', lambda m: m.span in hq_audio_spans)
|
||||
|
||||
|
||||
class AudioChannelsValidatorRule(Rule):
|
||||
|
@ -174,6 +220,9 @@ class AudioChannelsValidatorRule(Rule):
|
|||
priority = 128
|
||||
consequence = RemoveMatch
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'audio_channels')
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
|
||||
|
|
74
libs/guessit/rules/properties/bit_rate.py
Normal file
74
libs/guessit/rules/properties/bit_rate.py
Normal file
|
@ -0,0 +1,74 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
video_bit_rate and audio_bit_rate properties
|
||||
"""
|
||||
import re
|
||||
|
||||
from rebulk import Rebulk
|
||||
from rebulk.rules import Rule, RemoveMatch, RenameMatch
|
||||
|
||||
from ..common import dash, seps
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.quantity import BitRate
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def bit_rate(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'audio_bit_rate')
|
||||
and is_disabled(context, 'video_bit_rate')))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
||||
rebulk.defaults(name='audio_bit_rate', validator=seps_surround)
|
||||
rebulk.regex(r'\d+-?[kmg]b(ps|its?)', r'\d+\.\d+-?[kmg]b(ps|its?)',
|
||||
conflict_solver=(
|
||||
lambda match, other: match
|
||||
if other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
|
||||
else other
|
||||
),
|
||||
formatter=BitRate.fromstring, tags=['release-group-prefix'])
|
||||
|
||||
rebulk.rules(BitRateTypeRule)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class BitRateTypeRule(Rule):
|
||||
"""
|
||||
Convert audio bit rate guess into video bit rate.
|
||||
"""
|
||||
consequence = [RenameMatch('video_bit_rate'), RemoveMatch]
|
||||
|
||||
def when(self, matches, context):
|
||||
to_rename = []
|
||||
to_remove = []
|
||||
|
||||
if is_disabled(context, 'audio_bit_rate'):
|
||||
to_remove.extend(matches.named('audio_bit_rate'))
|
||||
else:
|
||||
video_bit_rate_disabled = is_disabled(context, 'video_bit_rate')
|
||||
for match in matches.named('audio_bit_rate'):
|
||||
previous = matches.previous(match, index=0,
|
||||
predicate=lambda m: m.name in ('source', 'screen_size', 'video_codec'))
|
||||
if previous and not matches.holes(previous.end, match.start, predicate=lambda m: m.value.strip(seps)):
|
||||
after = matches.next(match, index=0, predicate=lambda m: m.name == 'audio_codec')
|
||||
if after and not matches.holes(match.end, after.start, predicate=lambda m: m.value.strip(seps)):
|
||||
bitrate = match.value
|
||||
if bitrate.units == 'Kbps' or (bitrate.units == 'Mbps' and bitrate.magnitude < 10):
|
||||
continue
|
||||
|
||||
if video_bit_rate_disabled:
|
||||
to_remove.append(match)
|
||||
else:
|
||||
to_rename.append(match)
|
||||
|
||||
if to_rename or to_remove:
|
||||
return to_rename, to_remove
|
||||
return False
|
|
@ -9,21 +9,27 @@ from rebulk import Rebulk, AppendMatch, Rule
|
|||
|
||||
from .title import TitleFromPosition
|
||||
from ..common.formatters import cleanup
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def bonus():
|
||||
def bonus(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'bonus'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
|
||||
rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int,
|
||||
validator={'__parent__': lambda match: seps_surround},
|
||||
validator={'__parent__': seps_surround},
|
||||
validate_all=True,
|
||||
conflict_solver=lambda match, conflicting: match
|
||||
if conflicting.name in ['video_codec', 'episode'] and 'bonus-conflict' not in conflicting.tags
|
||||
if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags
|
||||
else '__default__')
|
||||
|
||||
rebulk.rules(BonusTitleRule)
|
||||
|
@ -40,7 +46,7 @@ class BonusTitleRule(Rule):
|
|||
|
||||
properties = {'bonus_title': [None]}
|
||||
|
||||
def when(self, matches, context):
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
bonus_number = matches.named('bonus', lambda match: not match.private, index=0)
|
||||
if bonus_number:
|
||||
filepath = matches.markers.at_match(bonus_number, lambda marker: marker.name == 'path', 0)
|
||||
|
|
|
@ -6,16 +6,22 @@ cd and cd_count properties
|
|||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk
|
||||
|
||||
from ..common import dash
|
||||
from ..common.pattern import is_disabled
|
||||
|
||||
|
||||
def cds():
|
||||
def cds(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'cd'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
||||
|
||||
rebulk.regex(r'cd-?(?P<cd>\d+)(?:-?of-?(?P<cd_count>\d+))?',
|
||||
validator={'cd': lambda match: 0 < match.value < 100,
|
||||
|
|
|
@ -8,33 +8,35 @@ from rebulk.remodule import re
|
|||
from rebulk import Rebulk
|
||||
|
||||
from ..common import seps
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
from ...reutils import build_or_pattern
|
||||
|
||||
|
||||
def container():
|
||||
def container(config):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'container'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
|
||||
rebulk.defaults(name='container',
|
||||
formatter=lambda value: value.strip(seps),
|
||||
tags=['extension'],
|
||||
conflict_solver=lambda match, other: other
|
||||
if other.name in ['format', 'video_codec'] or
|
||||
if other.name in ('source', 'video_codec') or
|
||||
other.name == 'container' and 'extension' not in other.tags
|
||||
else '__default__')
|
||||
|
||||
subtitles = ['srt', 'idx', 'sub', 'ssa', 'ass']
|
||||
info = ['nfo']
|
||||
videos = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
|
||||
'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
|
||||
'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv',
|
||||
'iso', 'vob']
|
||||
torrent = ['torrent']
|
||||
nzb = ['nzb']
|
||||
subtitles = config['subtitles']
|
||||
info = config['info']
|
||||
videos = config['videos']
|
||||
torrent = config['torrent']
|
||||
nzb = config['nzb']
|
||||
|
||||
rebulk.regex(r'\.'+build_or_pattern(subtitles)+'$', exts=subtitles, tags=['extension', 'subtitle'])
|
||||
rebulk.regex(r'\.'+build_or_pattern(info)+'$', exts=info, tags=['extension', 'info'])
|
||||
|
@ -42,15 +44,16 @@ def container():
|
|||
rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent'])
|
||||
rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb'])
|
||||
|
||||
rebulk.defaults(name='container',
|
||||
rebulk.defaults(clear=True,
|
||||
name='container',
|
||||
validator=seps_surround,
|
||||
formatter=lambda s: s.lower(),
|
||||
conflict_solver=lambda match, other: match
|
||||
if other.name in ['format',
|
||||
'video_codec'] or other.name == 'container' and 'extension' in other.tags
|
||||
if other.name in ('source',
|
||||
'video_codec') or other.name == 'container' and 'extension' in other.tags
|
||||
else '__default__')
|
||||
|
||||
rebulk.string(*[sub for sub in subtitles if sub not in ['sub']], tags=['subtitle'])
|
||||
rebulk.string(*[sub for sub in subtitles if sub not in ('sub', 'ass')], tags=['subtitle'])
|
||||
rebulk.string(*videos, tags=['video'])
|
||||
rebulk.string(*torrent, tags=['torrent'])
|
||||
rebulk.string(*nzb, tags=['nzb'])
|
||||
|
|
|
@ -7,41 +7,50 @@ country property
|
|||
import babelfish
|
||||
|
||||
from rebulk import Rebulk
|
||||
from ..common.words import COMMON_WORDS, iter_words
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.words import iter_words
|
||||
|
||||
|
||||
def country():
|
||||
def country(config, common_words):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:param common_words: common words
|
||||
:type common_words: set
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk().defaults(name='country')
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'country'))
|
||||
rebulk = rebulk.defaults(name='country')
|
||||
|
||||
def find_countries(string, context=None):
|
||||
"""
|
||||
Find countries in given string.
|
||||
"""
|
||||
allowed_countries = context.get('allowed_countries') if context else None
|
||||
return CountryFinder(allowed_countries, common_words).find(string)
|
||||
|
||||
rebulk.functional(find_countries,
|
||||
# Prefer language and any other property over country if not US or GB.
|
||||
conflict_solver=lambda match, other: match
|
||||
if other.name != 'language' or match.value not in [babelfish.Country('US'),
|
||||
babelfish.Country('GB')]
|
||||
if other.name != 'language' or match.value not in (babelfish.Country('US'),
|
||||
babelfish.Country('GB'))
|
||||
else other,
|
||||
properties={'country': [None]})
|
||||
properties={'country': [None]},
|
||||
disabled=lambda context: not context.get('allowed_countries'))
|
||||
|
||||
babelfish.country_converters['guessit'] = GuessitCountryConverter(config['synonyms'])
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
COUNTRIES_SYN = {'ES': ['españa'],
|
||||
'GB': ['UK'],
|
||||
'BR': ['brazilian', 'bra'],
|
||||
'CA': ['québec', 'quebec', 'qc'],
|
||||
# FIXME: this one is a bit of a stretch, not sure how to do it properly, though...
|
||||
'MX': ['Latinoamérica', 'latin america']}
|
||||
|
||||
|
||||
class GuessitCountryConverter(babelfish.CountryReverseConverter): # pylint: disable=missing-docstring
|
||||
def __init__(self):
|
||||
def __init__(self, synonyms):
|
||||
self.guessit_exceptions = {}
|
||||
|
||||
for alpha2, synlist in COUNTRIES_SYN.items():
|
||||
for alpha2, synlist in synonyms.items():
|
||||
for syn in synlist:
|
||||
self.guessit_exceptions[syn.lower()] = alpha2
|
||||
|
||||
|
@ -78,32 +87,28 @@ class GuessitCountryConverter(babelfish.CountryReverseConverter): # pylint: dis
|
|||
raise babelfish.CountryReverseError(name)
|
||||
|
||||
|
||||
babelfish.country_converters['guessit'] = GuessitCountryConverter()
|
||||
class CountryFinder(object):
|
||||
"""Helper class to search and return country matches."""
|
||||
|
||||
def __init__(self, allowed_countries, common_words):
|
||||
self.allowed_countries = {l.lower() for l in allowed_countries or []}
|
||||
self.common_words = common_words
|
||||
|
||||
def is_allowed_country(country_object, context=None):
|
||||
"""
|
||||
Check if country is allowed.
|
||||
"""
|
||||
if context and context.get('allowed_countries'):
|
||||
allowed_countries = context.get('allowed_countries')
|
||||
return country_object.name.lower() in allowed_countries or country_object.alpha2.lower() in allowed_countries
|
||||
return True
|
||||
def find(self, string):
|
||||
"""Return all matches for country."""
|
||||
for word_match in iter_words(string.strip().lower()):
|
||||
word = word_match.value
|
||||
if word.lower() in self.common_words:
|
||||
continue
|
||||
|
||||
try:
|
||||
country_object = babelfish.Country.fromguessit(word)
|
||||
if (country_object.name.lower() in self.allowed_countries or
|
||||
country_object.alpha2.lower() in self.allowed_countries):
|
||||
yield self._to_rebulk_match(word_match, country_object)
|
||||
except babelfish.Error:
|
||||
continue
|
||||
|
||||
def find_countries(string, context=None):
|
||||
"""
|
||||
Find countries in given string.
|
||||
"""
|
||||
ret = []
|
||||
for word_match in iter_words(string.strip().lower()):
|
||||
word = word_match.value
|
||||
if word.lower() in COMMON_WORDS:
|
||||
continue
|
||||
try:
|
||||
country_object = babelfish.Country.fromguessit(word)
|
||||
if is_allowed_country(country_object, context):
|
||||
ret.append((word_match.span[0], word_match.span[1], {'value': country_object}))
|
||||
except babelfish.Error:
|
||||
continue
|
||||
return ret
|
||||
@classmethod
|
||||
def _to_rebulk_match(cls, word, value):
|
||||
return word.span[0], word.span[1], {'value': value}
|
||||
|
|
|
@ -6,20 +6,25 @@ crc and uuid properties
|
|||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def crc():
|
||||
def crc(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'crc32'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.defaults(validator=seps_surround)
|
||||
|
||||
rebulk.regex('(?:[a-fA-F]|[0-9]){8}', name='crc32',
|
||||
conflict_solver=lambda match, other: match
|
||||
conflict_solver=lambda match, other: other
|
||||
if other.name in ['episode', 'season']
|
||||
else '__default__')
|
||||
|
||||
|
|
|
@ -6,21 +6,29 @@ date and year properties
|
|||
from rebulk import Rebulk, RemoveMatch, Rule
|
||||
|
||||
from ..common.date import search_date, valid_year
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def date():
|
||||
def date(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk().defaults(validator=seps_surround)
|
||||
|
||||
rebulk.regex(r"\d{4}", name="year", formatter=int,
|
||||
disabled=lambda context: is_disabled(context, 'year'),
|
||||
conflict_solver=lambda match, other: other
|
||||
if other.name in ('episode', 'season') and len(other.raw) < len(match.raw)
|
||||
else '__default__',
|
||||
validator=lambda match: seps_surround(match) and valid_year(match.value))
|
||||
|
||||
def date_functional(string, context):
|
||||
def date_functional(string, context): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Search for date in the string and retrieves match
|
||||
|
||||
|
@ -33,8 +41,9 @@ def date():
|
|||
return ret[0], ret[1], {'value': ret[2]}
|
||||
|
||||
rebulk.functional(date_functional, name="date", properties={'date': [None]},
|
||||
disabled=lambda context: is_disabled(context, 'date'),
|
||||
conflict_solver=lambda match, other: other
|
||||
if other.name in ['episode', 'season']
|
||||
if other.name in ('episode', 'season', 'crc32')
|
||||
else '__default__')
|
||||
|
||||
rebulk.rules(KeepMarkedYearInFilepart)
|
||||
|
@ -49,6 +58,9 @@ class KeepMarkedYearInFilepart(Rule):
|
|||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'year')
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
if len(matches.named('year')) > 1:
|
||||
|
|
|
@ -7,28 +7,34 @@ from rebulk.remodule import re
|
|||
|
||||
from rebulk import Rebulk
|
||||
from ..common import dash
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def edition():
|
||||
def edition(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'edition'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
|
||||
rebulk.defaults(name='edition', validator=seps_surround)
|
||||
|
||||
rebulk.regex('collector', 'collector-edition', 'edition-collector', value='Collector Edition')
|
||||
rebulk.regex('special-edition', 'edition-special', value='Special Edition',
|
||||
rebulk.regex('collector', "collector'?s?-edition", 'edition-collector', value='Collector')
|
||||
rebulk.regex('special-edition', 'edition-special', value='Special',
|
||||
conflict_solver=lambda match, other: other
|
||||
if other.name == 'episode_details' and other.value == 'Special'
|
||||
else '__default__')
|
||||
rebulk.string('se', value='Special Edition', tags='has-neighbor')
|
||||
rebulk.regex('criterion-edition', 'edition-criterion', value='Criterion Edition')
|
||||
rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe Edition')
|
||||
rebulk.regex('limited', 'limited-edition', value='Limited Edition', tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical Edition')
|
||||
rebulk.string('se', value='Special', tags='has-neighbor')
|
||||
rebulk.string('ddc', value="Director's Definitive Cut")
|
||||
rebulk.regex('criterion-edition', 'edition-criterion', 'CC', value='Criterion')
|
||||
rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe')
|
||||
rebulk.regex('limited', 'limited-edition', value='Limited', tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical')
|
||||
rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC',
|
||||
value="Director's Cut")
|
||||
rebulk.regex('extended', 'extended-?cut', 'extended-?version',
|
||||
|
@ -37,5 +43,10 @@ def edition():
|
|||
for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'):
|
||||
rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after'])
|
||||
rebulk.regex('imax', 'imax-edition', value='IMAX')
|
||||
rebulk.regex('fan-edit(?:ion)?', 'fan-collection', value='Fan')
|
||||
rebulk.regex('ultimate-edition', value='Ultimate')
|
||||
rebulk.regex("ultimate-collector'?s?-edition", value=['Ultimate', 'Collector'])
|
||||
rebulk.regex('ultimate-fan-edit(?:ion)?', 'ultimate-fan-collection', value=['Ultimate', 'Fan'])
|
||||
|
||||
return rebulk
|
||||
|
|
|
@ -9,26 +9,32 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, RenameMatch, POST_PRO
|
|||
|
||||
from ..common import seps, title_seps
|
||||
from ..common.formatters import cleanup
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import or_
|
||||
from ..properties.title import TitleFromPosition, TitleBaseRule
|
||||
from ..properties.type import TypeProcessor
|
||||
|
||||
|
||||
def episode_title():
|
||||
def episode_title(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
previous_names = ('episode', 'episode_details', 'episode_count',
|
||||
previous_names = ('episode', 'episode_count',
|
||||
'season', 'season_count', 'date', 'title', 'year')
|
||||
|
||||
rebulk = Rebulk().rules(RemoveConflictsWithEpisodeTitle(previous_names),
|
||||
EpisodeTitleFromPosition(previous_names),
|
||||
AlternativeTitleReplace(previous_names),
|
||||
TitleToEpisodeTitle,
|
||||
Filepart3EpisodeTitle,
|
||||
Filepart2EpisodeTitle,
|
||||
RenameEpisodeTitleWhenMovieType)
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'episode_title'))
|
||||
rebulk = rebulk.rules(RemoveConflictsWithEpisodeTitle(previous_names),
|
||||
EpisodeTitleFromPosition(previous_names),
|
||||
AlternativeTitleReplace(previous_names),
|
||||
TitleToEpisodeTitle,
|
||||
Filepart3EpisodeTitle,
|
||||
Filepart2EpisodeTitle,
|
||||
RenameEpisodeTitleWhenMovieType)
|
||||
return rebulk
|
||||
|
||||
|
||||
|
@ -43,7 +49,7 @@ class RemoveConflictsWithEpisodeTitle(Rule):
|
|||
def __init__(self, previous_names):
|
||||
super(RemoveConflictsWithEpisodeTitle, self).__init__()
|
||||
self.previous_names = previous_names
|
||||
self.next_names = ('streaming_service', 'screen_size', 'format',
|
||||
self.next_names = ('streaming_service', 'screen_size', 'source',
|
||||
'video_codec', 'audio_codec', 'other', 'container')
|
||||
self.affected_if_holes_after = ('part', )
|
||||
self.affected_names = ('part', 'year')
|
||||
|
@ -53,13 +59,11 @@ class RemoveConflictsWithEpisodeTitle(Rule):
|
|||
for filepart in matches.markers.named('path'):
|
||||
for match in matches.range(filepart.start, filepart.end,
|
||||
predicate=lambda m: m.name in self.affected_names):
|
||||
before = matches.previous(match, index=0,
|
||||
predicate=lambda m, fp=filepart: not m.private and m.start >= fp.start)
|
||||
before = matches.range(filepart.start, match.start, predicate=lambda m: not m.private, index=-1)
|
||||
if not before or before.name not in self.previous_names:
|
||||
continue
|
||||
|
||||
after = matches.next(match, index=0,
|
||||
predicate=lambda m, fp=filepart: not m.private and m.end <= fp.end)
|
||||
after = matches.range(match.end, filepart.end, predicate=lambda m: not m.private, index=0)
|
||||
if not after or after.name not in self.next_names:
|
||||
continue
|
||||
|
||||
|
@ -100,16 +104,15 @@ class TitleToEpisodeTitle(Rule):
|
|||
for title in titles:
|
||||
title_groups[title.value].append(title)
|
||||
|
||||
if len(title_groups) < 2:
|
||||
return
|
||||
|
||||
episode_titles = []
|
||||
if len(title_groups) < 2:
|
||||
return episode_titles
|
||||
|
||||
for title in titles:
|
||||
if matches.previous(title, lambda match: match.name == 'episode'):
|
||||
episode_titles.append(title)
|
||||
|
||||
if episode_titles:
|
||||
return episode_titles
|
||||
return episode_titles
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
for title in when_response:
|
||||
|
@ -131,8 +134,7 @@ class EpisodeTitleFromPosition(TitleBaseRule):
|
|||
|
||||
def hole_filter(self, hole, matches):
|
||||
episode = matches.previous(hole,
|
||||
lambda previous: any(name in previous.names
|
||||
for name in self.previous_names),
|
||||
lambda previous: previous.named(*self.previous_names),
|
||||
0)
|
||||
|
||||
crc32 = matches.named('crc32')
|
||||
|
@ -150,7 +152,7 @@ class EpisodeTitleFromPosition(TitleBaseRule):
|
|||
return False
|
||||
return super(EpisodeTitleFromPosition, self).should_remove(match, matches, filepart, hole, context)
|
||||
|
||||
def when(self, matches, context):
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
if matches.named('episode_title'):
|
||||
return
|
||||
return super(EpisodeTitleFromPosition, self).when(matches, context)
|
||||
|
@ -167,7 +169,7 @@ class AlternativeTitleReplace(Rule):
|
|||
super(AlternativeTitleReplace, self).__init__()
|
||||
self.previous_names = previous_names
|
||||
|
||||
def when(self, matches, context):
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
if matches.named('episode_title'):
|
||||
return
|
||||
|
||||
|
@ -177,8 +179,7 @@ class AlternativeTitleReplace(Rule):
|
|||
predicate=lambda match: 'title' in match.tags, index=0)
|
||||
if main_title:
|
||||
episode = matches.previous(main_title,
|
||||
lambda previous: any(name in previous.names
|
||||
for name in self.previous_names),
|
||||
lambda previous: previous.named(*self.previous_names),
|
||||
0)
|
||||
|
||||
crc32 = matches.named('crc32')
|
||||
|
@ -202,7 +203,7 @@ class RenameEpisodeTitleWhenMovieType(Rule):
|
|||
dependency = TypeProcessor
|
||||
consequence = RenameMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
if matches.named('episode_title', lambda m: 'alternative-replaced' not in m.tags) \
|
||||
and not matches.named('type', lambda m: m.value == 'episode'):
|
||||
return matches.named('episode_title')
|
||||
|
@ -221,12 +222,18 @@ class Filepart3EpisodeTitle(Rule):
|
|||
Serie name/SO1/E01-episode_title.mkv
|
||||
AAAAAAAAAA/BBB/CCCCCCCCCCCCCCCCCCCC
|
||||
|
||||
Serie name/SO1/episode_title-E01.mkv
|
||||
AAAAAAAAAA/BBB/CCCCCCCCCCCCCCCCCCCC
|
||||
|
||||
If CCCC contains episode and BBB contains seasonNumber
|
||||
Then title is to be found in AAAA.
|
||||
"""
|
||||
consequence = AppendMatch('title')
|
||||
|
||||
def when(self, matches, context):
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
if matches.tagged('filepart-title'):
|
||||
return
|
||||
|
||||
fileparts = matches.markers.named('path')
|
||||
if len(fileparts) < 3:
|
||||
return
|
||||
|
@ -241,6 +248,7 @@ class Filepart3EpisodeTitle(Rule):
|
|||
|
||||
if season:
|
||||
hole = matches.holes(subdirectory.start, subdirectory.end,
|
||||
ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
|
||||
formatter=cleanup, seps=title_seps, predicate=lambda match: match.value,
|
||||
index=0)
|
||||
if hole:
|
||||
|
@ -267,7 +275,10 @@ class Filepart2EpisodeTitle(Rule):
|
|||
"""
|
||||
consequence = AppendMatch('title')
|
||||
|
||||
def when(self, matches, context):
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
if matches.tagged('filepart-title'):
|
||||
return
|
||||
|
||||
fileparts = matches.markers.named('path')
|
||||
if len(fileparts) < 2:
|
||||
return
|
||||
|
@ -280,7 +291,10 @@ class Filepart2EpisodeTitle(Rule):
|
|||
season = (matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0) or
|
||||
matches.range(filename.start, filename.end, lambda match: match.name == 'season', 0))
|
||||
if season:
|
||||
hole = matches.holes(directory.start, directory.end, formatter=cleanup, seps=title_seps,
|
||||
hole = matches.holes(directory.start, directory.end,
|
||||
ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
|
||||
formatter=cleanup, seps=title_seps,
|
||||
predicate=lambda match: match.value, index=0)
|
||||
if hole:
|
||||
hole.tags.append('filepart-title')
|
||||
return hole
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
episode, season, episode_count, season_count and episode_details properties
|
||||
episode, season, disc, episode_count, season_count and episode_details properties
|
||||
"""
|
||||
import copy
|
||||
from collections import defaultdict
|
||||
|
@ -11,24 +11,30 @@ from rebulk.match import Match
|
|||
from rebulk.remodule import re
|
||||
from rebulk.utils import is_iterable
|
||||
|
||||
from guessit.rules import match_processors
|
||||
from guessit.rules.common.numeral import parse_numeral, numeral
|
||||
from .title import TitleFromPosition
|
||||
from ..common import dash, alt_dash, seps
|
||||
from ..common import dash, alt_dash, seps, seps_no_fs
|
||||
from ..common.formatters import strip
|
||||
from ..common.numeral import numeral, parse_numeral
|
||||
from ..common.validators import compose, seps_surround, seps_before, int_coercable
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround, int_coercable, and_
|
||||
from ...reutils import build_or_pattern
|
||||
|
||||
|
||||
def episodes():
|
||||
def episodes(config):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
|
||||
# pylint: disable=too-many-branches,too-many-statements,too-many-locals
|
||||
rebulk = Rebulk()
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'])
|
||||
def is_season_episode_disabled(context):
|
||||
"""Whether season and episode rules should be enabled."""
|
||||
return is_disabled(context, 'episode') or is_disabled(context, 'season')
|
||||
|
||||
def episodes_season_chain_breaker(matches):
|
||||
"""
|
||||
|
@ -39,16 +45,14 @@ def episodes():
|
|||
:rtype:
|
||||
"""
|
||||
eps = matches.named('episode')
|
||||
if len(eps) > 1 and abs(eps[-1].value - eps[-2].value) > 100:
|
||||
if len(eps) > 1 and abs(eps[-1].value - eps[-2].value) > episode_max_range:
|
||||
return True
|
||||
|
||||
seasons = matches.named('season')
|
||||
if len(seasons) > 1 and abs(seasons[-1].value - seasons[-2].value) > 100:
|
||||
if len(seasons) > 1 and abs(seasons[-1].value - seasons[-2].value) > season_max_range:
|
||||
return True
|
||||
return False
|
||||
|
||||
rebulk.chain_defaults(chain_breaker=episodes_season_chain_breaker)
|
||||
|
||||
def season_episode_conflict_solver(match, other):
|
||||
"""
|
||||
Conflict solver for episode/season patterns
|
||||
|
@ -57,40 +61,25 @@ def episodes():
|
|||
:param other:
|
||||
:return:
|
||||
"""
|
||||
if match.name == 'episode' and other.name in \
|
||||
['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date', 'year'] \
|
||||
and 'weak-audio_channels' not in other.tags:
|
||||
return match
|
||||
if match.name == 'season' and other.name in \
|
||||
['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date'] \
|
||||
and 'weak-audio_channels' not in other.tags:
|
||||
return match
|
||||
if match.name in ['season', 'episode'] and other.name in ['season', 'episode'] \
|
||||
and match.initiator != other.initiator:
|
||||
if 'weak-episode' in match.tags or 'x' in match.initiator.raw.lower():
|
||||
if match.name != other.name:
|
||||
if match.name == 'episode' and other.name == 'year':
|
||||
return match
|
||||
if 'weak-episode' in other.tags or 'x' in other.initiator.raw.lower():
|
||||
return other
|
||||
if match.name in ('season', 'episode'):
|
||||
if other.name in ('video_codec', 'audio_codec', 'container', 'date'):
|
||||
return match
|
||||
if (other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
|
||||
and not match.initiator.children.named(match.name + 'Marker')) or (
|
||||
other.name == 'screen_size' and not int_coercable(other.raw)):
|
||||
return match
|
||||
if other.name in ('season', 'episode') and match.initiator != other.initiator:
|
||||
if (match.initiator.name in ('weak_episode', 'weak_duplicate')
|
||||
and other.initiator.name in ('weak_episode', 'weak_duplicate')):
|
||||
return '__default__'
|
||||
for current in (match, other):
|
||||
if 'weak-episode' in current.tags or 'x' in current.initiator.raw.lower():
|
||||
return current
|
||||
return '__default__'
|
||||
|
||||
season_episode_seps = []
|
||||
season_episode_seps.extend(seps)
|
||||
season_episode_seps.extend(['x', 'X', 'e', 'E'])
|
||||
|
||||
season_words = ['season', 'saison', 'seizoen', 'serie', 'seasons', 'saisons', 'series',
|
||||
'tem', 'temp', 'temporada', 'temporadas', 'stagione']
|
||||
episode_words = ['episode', 'episodes', 'eps', 'ep', 'episodio',
|
||||
'episodios', 'capitulo', 'capitulos']
|
||||
of_words = ['of', 'sur']
|
||||
all_words = ['All']
|
||||
season_markers = ["S"]
|
||||
season_ep_markers = ["x"]
|
||||
episode_markers = ["xE", "Ex", "EP", "E", "x"]
|
||||
range_separators = ['-', '~', 'to', 'a']
|
||||
weak_discrete_separators = list(sep for sep in seps if sep not in range_separators)
|
||||
strong_discrete_separators = ['+', '&', 'and', 'et']
|
||||
discrete_separators = strong_discrete_separators + weak_discrete_separators
|
||||
|
||||
def ordering_validator(match):
|
||||
"""
|
||||
Validator for season list. They should be in natural order to be validated.
|
||||
|
@ -124,64 +113,18 @@ def episodes():
|
|||
lambda m: m.name == property_name + 'Separator')
|
||||
separator = match.children.previous(current_match,
|
||||
lambda m: m.name == property_name + 'Separator', 0)
|
||||
if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
|
||||
if not current_match.value - previous_match.value == 1:
|
||||
valid = False
|
||||
if separator.raw in strong_discrete_separators:
|
||||
valid = True
|
||||
break
|
||||
if separator:
|
||||
if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
|
||||
if not 0 < current_match.value - previous_match.value <= max_range_gap + 1:
|
||||
valid = False
|
||||
if separator.raw in strong_discrete_separators:
|
||||
valid = True
|
||||
break
|
||||
previous_match = current_match
|
||||
return valid
|
||||
|
||||
return is_consecutive('episode') and is_consecutive('season')
|
||||
|
||||
# S01E02, 01x02, S01S02S03
|
||||
rebulk.chain(formatter={'season': int, 'episode': int},
|
||||
tags=['SxxExx'],
|
||||
abbreviations=[alt_dash],
|
||||
children=True,
|
||||
private_parent=True,
|
||||
validate_all=True,
|
||||
validator={'__parent__': ordering_validator},
|
||||
conflict_solver=season_episode_conflict_solver) \
|
||||
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
|
||||
build_or_pattern(episode_markers, name='episodeMarker') + r'@?(?P<episode>\d+)',
|
||||
validate_all=True,
|
||||
validator={'__parent__': seps_before}).repeater('+') \
|
||||
.regex(build_or_pattern(episode_markers + discrete_separators + range_separators,
|
||||
name='episodeSeparator',
|
||||
escape=True) +
|
||||
r'(?P<episode>\d+)').repeater('*') \
|
||||
.chain() \
|
||||
.regex(r'(?P<season>\d+)@?' +
|
||||
build_or_pattern(season_ep_markers, name='episodeMarker') +
|
||||
r'@?(?P<episode>\d+)',
|
||||
validate_all=True,
|
||||
validator={'__parent__': seps_before}) \
|
||||
.chain() \
|
||||
.regex(r'(?P<season>\d+)@?' +
|
||||
build_or_pattern(season_ep_markers, name='episodeMarker') +
|
||||
r'@?(?P<episode>\d+)',
|
||||
validate_all=True,
|
||||
validator={'__parent__': seps_before}) \
|
||||
.regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
|
||||
name='episodeSeparator',
|
||||
escape=True) +
|
||||
r'(?P<episode>\d+)').repeater('*') \
|
||||
.chain() \
|
||||
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)',
|
||||
validate_all=True,
|
||||
validator={'__parent__': seps_before}) \
|
||||
.regex(build_or_pattern(season_markers + discrete_separators + range_separators,
|
||||
name='seasonSeparator',
|
||||
escape=True) +
|
||||
r'(?P<season>\d+)').repeater('*')
|
||||
|
||||
# episode_details property
|
||||
for episode_detail in ('Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired'):
|
||||
rebulk.string(episode_detail, value=episode_detail, name='episode_details')
|
||||
rebulk.regex(r'Extras?', name='episode_details', value='Extras')
|
||||
|
||||
def validate_roman(match):
|
||||
"""
|
||||
Validate a roman match if surrounded by separators
|
||||
|
@ -194,110 +137,204 @@ def episodes():
|
|||
return True
|
||||
return seps_surround(match)
|
||||
|
||||
season_words = config['season_words']
|
||||
episode_words = config['episode_words']
|
||||
of_words = config['of_words']
|
||||
all_words = config['all_words']
|
||||
season_markers = config['season_markers']
|
||||
season_ep_markers = config['season_ep_markers']
|
||||
disc_markers = config['disc_markers']
|
||||
episode_markers = config['episode_markers']
|
||||
range_separators = config['range_separators']
|
||||
weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators)
|
||||
strong_discrete_separators = config['discrete_separators']
|
||||
discrete_separators = strong_discrete_separators + weak_discrete_separators
|
||||
episode_max_range = config['episode_max_range']
|
||||
season_max_range = config['season_max_range']
|
||||
max_range_gap = config['max_range_gap']
|
||||
|
||||
rebulk = Rebulk() \
|
||||
.regex_defaults(flags=re.IGNORECASE) \
|
||||
.string_defaults(ignore_case=True) \
|
||||
.chain_defaults(chain_breaker=episodes_season_chain_breaker) \
|
||||
.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
|
||||
formatter={'season': int, 'episode': int, 'version': int, 'count': int},
|
||||
children=True,
|
||||
private_parent=True,
|
||||
conflict_solver=season_episode_conflict_solver,
|
||||
abbreviations=[alt_dash])
|
||||
|
||||
# S01E02, 01x02, S01S02S03
|
||||
rebulk.chain(
|
||||
tags=['SxxExx'],
|
||||
validate_all=True,
|
||||
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
||||
disabled=is_season_episode_disabled) \
|
||||
.defaults(tags=['SxxExx']) \
|
||||
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
|
||||
build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)')\
|
||||
.repeater('+') \
|
||||
.regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators,
|
||||
name='episodeSeparator',
|
||||
escape=True) +
|
||||
r'(?P<episode>\d+)').repeater('*')
|
||||
|
||||
rebulk.chain(tags=['SxxExx'],
|
||||
validate_all=True,
|
||||
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
||||
disabled=is_season_episode_disabled) \
|
||||
.defaults(tags=['SxxExx']) \
|
||||
.regex(r'(?P<season>\d+)@?' +
|
||||
build_or_pattern(season_ep_markers, name='episodeMarker') +
|
||||
r'@?(?P<episode>\d+)').repeater('+') \
|
||||
|
||||
rebulk.chain(tags=['SxxExx'],
|
||||
validate_all=True,
|
||||
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
||||
disabled=is_season_episode_disabled) \
|
||||
.defaults(tags=['SxxExx']) \
|
||||
.regex(r'(?P<season>\d+)@?' +
|
||||
build_or_pattern(season_ep_markers, name='episodeMarker') +
|
||||
r'@?(?P<episode>\d+)') \
|
||||
.regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
|
||||
name='episodeSeparator',
|
||||
escape=True) +
|
||||
r'(?P<episode>\d+)').repeater('*')
|
||||
|
||||
rebulk.chain(tags=['SxxExx'],
|
||||
validate_all=True,
|
||||
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
||||
disabled=is_season_episode_disabled) \
|
||||
.defaults(tags=['SxxExx']) \
|
||||
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)') \
|
||||
.regex('(?P<other>Extras)', name='other', value='Extras', tags=['no-release-group-prefix']).repeater('?') \
|
||||
.regex(build_or_pattern(season_markers + discrete_separators + range_separators,
|
||||
name='seasonSeparator',
|
||||
escape=True) +
|
||||
r'(?P<season>\d+)').repeater('*')
|
||||
|
||||
# episode_details property
|
||||
for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'):
|
||||
rebulk.string(episode_detail,
|
||||
private_parent=False,
|
||||
children=False,
|
||||
value=episode_detail,
|
||||
name='episode_details',
|
||||
disabled=lambda context: is_disabled(context, 'episode_details'))
|
||||
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
|
||||
validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True,
|
||||
validate_all=True,
|
||||
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
||||
children=True,
|
||||
private_parent=True,
|
||||
conflict_solver=season_episode_conflict_solver)
|
||||
|
||||
rebulk.chain(abbreviations=[alt_dash],
|
||||
rebulk.chain(validate_all=True,
|
||||
conflict_solver=season_episode_conflict_solver,
|
||||
formatter={'season': parse_numeral, 'count': parse_numeral},
|
||||
validator={'__parent__': compose(seps_surround, ordering_validator),
|
||||
validator={'__parent__': and_(seps_surround, ordering_validator),
|
||||
'season': validate_roman,
|
||||
'count': validate_roman}) \
|
||||
.defaults(validator=None) \
|
||||
'count': validate_roman},
|
||||
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'season')) \
|
||||
.defaults(formatter={'season': parse_numeral, 'count': parse_numeral},
|
||||
validator={'season': validate_roman, 'count': validate_roman},
|
||||
conflict_solver=season_episode_conflict_solver) \
|
||||
.regex(build_or_pattern(season_words, name='seasonMarker') + '@?(?P<season>' + numeral + ')') \
|
||||
.regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \
|
||||
.regex(r'@?' + build_or_pattern(range_separators + discrete_separators + ['@'],
|
||||
name='seasonSeparator', escape=True) +
|
||||
r'@?(?P<season>\d+)').repeater('*')
|
||||
|
||||
rebulk.defaults(abbreviations=[dash])
|
||||
|
||||
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>\d+)' +
|
||||
r'(?:v(?P<version>\d+))?' +
|
||||
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
|
||||
abbreviations=[dash], formatter={'episode': int, 'version': int, 'count': int},
|
||||
disabled=lambda context: context.get('type') == 'episode')
|
||||
disabled=lambda context: context.get('type') == 'episode' or is_disabled(context, 'episode'))
|
||||
|
||||
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>' + numeral + ')' +
|
||||
r'(?:v(?P<version>\d+))?' +
|
||||
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
|
||||
abbreviations=[dash],
|
||||
validator={'episode': validate_roman},
|
||||
formatter={'episode': parse_numeral, 'version': int, 'count': int},
|
||||
disabled=lambda context: context.get('type') != 'episode')
|
||||
formatter={'episode': parse_numeral},
|
||||
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode'))
|
||||
|
||||
rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')',
|
||||
tags=['SxxExx'],
|
||||
abbreviations=[dash],
|
||||
validator=None,
|
||||
formatter={'season': int, 'other': lambda match: 'Complete'})
|
||||
formatter={'other': lambda match: 'Complete'},
|
||||
disabled=lambda context: is_disabled(context, 'season'))
|
||||
|
||||
# 12, 13
|
||||
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int},
|
||||
disabled=lambda context: context.get('type') == 'movie') \
|
||||
.defaults(validator=None) \
|
||||
rebulk.chain(tags=['weak-episode'],
|
||||
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None, tags=['weak-episode']) \
|
||||
.regex(r'(?P<episode>\d{2})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*')
|
||||
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})', abbreviations=None).repeater('*')
|
||||
|
||||
# 012, 013
|
||||
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int},
|
||||
disabled=lambda context: context.get('type') == 'movie') \
|
||||
.defaults(validator=None) \
|
||||
rebulk.chain(tags=['weak-episode'],
|
||||
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None, tags=['weak-episode']) \
|
||||
.regex(r'0(?P<episode>\d{1,2})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*')
|
||||
.regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
|
||||
|
||||
# 112, 113
|
||||
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int},
|
||||
disabled=lambda context: (not context.get('episode_prefer_number', False) or
|
||||
context.get('type') == 'movie')) \
|
||||
.defaults(validator=None) \
|
||||
rebulk.chain(tags=['weak-episode'],
|
||||
name='weak_episode',
|
||||
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None, tags=['weak-episode'], name='weak_episode') \
|
||||
.regex(r'(?P<episode>\d{3,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*')
|
||||
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})', abbreviations=None).repeater('*')
|
||||
|
||||
# 1, 2, 3
|
||||
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int},
|
||||
disabled=lambda context: context.get('type') != 'episode') \
|
||||
.defaults(validator=None) \
|
||||
rebulk.chain(tags=['weak-episode'],
|
||||
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None, tags=['weak-episode']) \
|
||||
.regex(r'(?P<episode>\d)') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*')
|
||||
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
|
||||
|
||||
# e112, e113
|
||||
# TODO: Enhance rebulk for validator to be used globally (season_episode_validator)
|
||||
rebulk.chain(formatter={'episode': int, 'version': int}) \
|
||||
# e112, e113, 1e18, 3e19
|
||||
rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'(?P<episodeMarker>e)(?P<episode>\d{1,4})') \
|
||||
.regex(r'(?P<season>\d{1,2})?(?P<episodeMarker>e)(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')
|
||||
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
|
||||
|
||||
# ep 112, ep113, ep112, ep113
|
||||
rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}) \
|
||||
rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'ep-?(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*')
|
||||
.regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
|
||||
|
||||
# cap 112, cap 112_114
|
||||
rebulk.chain(abbreviations=[dash],
|
||||
tags=['see-pattern'],
|
||||
formatter={'season': int, 'episode': int}) \
|
||||
.defaults(validator=None) \
|
||||
rebulk.chain(tags=['see-pattern'],
|
||||
disabled=is_season_episode_disabled) \
|
||||
.defaults(validator=None, tags=['see-pattern']) \
|
||||
.regex(r'(?P<seasonMarker>cap)-?(?P<season>\d{1,2})(?P<episode>\d{2})') \
|
||||
.regex(r'(?P<episodeSeparator>-)(?P<season>\d{1,2})(?P<episode>\d{2})').repeater('?')
|
||||
|
||||
# 102, 0102
|
||||
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode', 'weak-duplicate'],
|
||||
formatter={'season': int, 'episode': int, 'version': int},
|
||||
conflict_solver=lambda match, other: match if other.name == 'year' else '__default__',
|
||||
rebulk.chain(tags=['weak-episode', 'weak-duplicate'],
|
||||
name='weak_duplicate',
|
||||
conflict_solver=season_episode_conflict_solver,
|
||||
disabled=lambda context: (context.get('episode_prefer_number', False) or
|
||||
context.get('type') == 'movie')) \
|
||||
.defaults(validator=None) \
|
||||
context.get('type') == 'movie') or is_season_episode_disabled(context)) \
|
||||
.defaults(tags=['weak-episode', 'weak-duplicate'],
|
||||
name='weak_duplicate',
|
||||
validator=None,
|
||||
conflict_solver=season_episode_conflict_solver) \
|
||||
.regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*')
|
||||
.regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})', abbreviations=None).repeater('*')
|
||||
|
||||
rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int)
|
||||
rebulk.regex(r'v(?P<version>\d+)',
|
||||
formatter=int,
|
||||
disabled=lambda context: is_disabled(context, 'version'))
|
||||
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])
|
||||
|
||||
|
@ -305,19 +342,107 @@ def episodes():
|
|||
# detached of X count (season/episode)
|
||||
rebulk.regex(r'(?P<episode>\d+)-?' + build_or_pattern(of_words) +
|
||||
r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?',
|
||||
abbreviations=[dash], children=True, private_parent=True, formatter=int)
|
||||
formatter=int,
|
||||
pre_match_processor=match_processors.strip,
|
||||
disabled=lambda context: is_disabled(context, 'episode'))
|
||||
|
||||
rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode")
|
||||
rebulk.regex(r'Minisodes?',
|
||||
children=False,
|
||||
private_parent=False,
|
||||
name='episode_format',
|
||||
value="Minisode",
|
||||
disabled=lambda context: is_disabled(context, 'episode_format'))
|
||||
|
||||
rebulk.rules(RemoveInvalidSeason, RemoveInvalidEpisode,
|
||||
SeePatternRange(range_separators + ['_']), EpisodeNumberSeparatorRange(range_separators),
|
||||
SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx,
|
||||
RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator,
|
||||
CountValidator, EpisodeSingleDigitValidator)
|
||||
rebulk.rules(WeakConflictSolver, RemoveInvalidSeason, RemoveInvalidEpisode,
|
||||
SeePatternRange(range_separators + ['_']),
|
||||
EpisodeNumberSeparatorRange(range_separators),
|
||||
SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate,
|
||||
EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, RemoveWeak(episode_words),
|
||||
RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class WeakConflictSolver(Rule):
|
||||
"""
|
||||
Rule to decide whether weak-episode or weak-duplicate matches should be kept.
|
||||
|
||||
If an anime is detected:
|
||||
- weak-duplicate matches should be removed
|
||||
- weak-episode matches should be tagged as anime
|
||||
Otherwise:
|
||||
- weak-episode matches are removed unless they're part of an episode range match.
|
||||
"""
|
||||
priority = 128
|
||||
consequence = [RemoveMatch, AppendMatch]
|
||||
|
||||
def enabled(self, context):
|
||||
return context.get('type') != 'movie'
|
||||
|
||||
@classmethod
|
||||
def is_anime(cls, matches):
|
||||
"""Return True if it seems to be an anime.
|
||||
|
||||
Anime characteristics:
|
||||
- version, crc32 matches
|
||||
- screen_size inside brackets
|
||||
- release_group at start and inside brackets
|
||||
"""
|
||||
if matches.named('version') or matches.named('crc32'):
|
||||
return True
|
||||
|
||||
for group in matches.markers.named('group'):
|
||||
if matches.range(group.start, group.end, predicate=lambda m: m.name == 'screen_size'):
|
||||
return True
|
||||
if matches.markers.starting(group.start, predicate=lambda m: m.name == 'path'):
|
||||
hole = matches.holes(group.start, group.end, index=0)
|
||||
if hole and hole.raw == group.raw:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
to_append = []
|
||||
anime_detected = self.is_anime(matches)
|
||||
for filepart in matches.markers.named('path'):
|
||||
weak_matches = matches.range(filepart.start, filepart.end, predicate=(
|
||||
lambda m: m.initiator.name == 'weak_episode'))
|
||||
weak_dup_matches = matches.range(filepart.start, filepart.end, predicate=(
|
||||
lambda m: m.initiator.name == 'weak_duplicate'))
|
||||
if anime_detected:
|
||||
if weak_matches:
|
||||
to_remove.extend(weak_dup_matches)
|
||||
for match in matches.range(filepart.start, filepart.end, predicate=(
|
||||
lambda m: m.name == 'episode' and m.initiator.name != 'weak_duplicate')):
|
||||
episode = copy.copy(match)
|
||||
episode.tags = episode.tags + ['anime']
|
||||
to_append.append(episode)
|
||||
to_remove.append(match)
|
||||
elif weak_dup_matches:
|
||||
episodes_in_range = matches.range(filepart.start, filepart.end, predicate=(
|
||||
lambda m:
|
||||
m.name == 'episode' and m.initiator.name == 'weak_episode'
|
||||
and m.initiator.children.named('episodeSeparator')
|
||||
))
|
||||
if not episodes_in_range and not matches.range(filepart.start, filepart.end,
|
||||
predicate=lambda m: 'SxxExx' in m.tags):
|
||||
to_remove.extend(weak_matches)
|
||||
else:
|
||||
for match in episodes_in_range:
|
||||
episode = copy.copy(match)
|
||||
episode.tags = []
|
||||
to_append.append(episode)
|
||||
to_remove.append(match)
|
||||
|
||||
if to_append:
|
||||
to_remove.extend(weak_dup_matches)
|
||||
|
||||
if to_remove or to_append:
|
||||
return to_remove, to_append
|
||||
return False
|
||||
|
||||
|
||||
class CountValidator(Rule):
|
||||
"""
|
||||
Validate count property and rename it
|
||||
|
@ -341,7 +466,9 @@ class CountValidator(Rule):
|
|||
season_count.append(count)
|
||||
else:
|
||||
to_remove.append(count)
|
||||
return to_remove, episode_count, season_count
|
||||
if to_remove or episode_count or season_count:
|
||||
return to_remove, episode_count, season_count
|
||||
return False
|
||||
|
||||
|
||||
class SeePatternRange(Rule):
|
||||
|
@ -376,7 +503,9 @@ class SeePatternRange(Rule):
|
|||
|
||||
to_remove.append(separator)
|
||||
|
||||
return to_remove, to_append
|
||||
if to_remove or to_append:
|
||||
return to_remove, to_append
|
||||
return False
|
||||
|
||||
|
||||
class AbstractSeparatorRange(Rule):
|
||||
|
@ -396,14 +525,16 @@ class AbstractSeparatorRange(Rule):
|
|||
to_append = []
|
||||
|
||||
for separator in matches.named(self.property_name + 'Separator'):
|
||||
previous_match = matches.previous(separator, lambda match: match.name == self.property_name, 0)
|
||||
next_match = matches.next(separator, lambda match: match.name == self.property_name, 0)
|
||||
previous_match = matches.previous(separator, lambda m: m.name == self.property_name, 0)
|
||||
next_match = matches.next(separator, lambda m: m.name == self.property_name, 0)
|
||||
initiator = separator.initiator
|
||||
|
||||
if previous_match and next_match and separator.value in self.range_separators:
|
||||
to_remove.append(next_match)
|
||||
for episode_number in range(previous_match.value + 1, next_match.value):
|
||||
match = copy.copy(next_match)
|
||||
match.value = episode_number
|
||||
initiator.children.append(match)
|
||||
to_append.append(match)
|
||||
to_append.append(next_match)
|
||||
to_remove.append(separator)
|
||||
|
@ -415,9 +546,11 @@ class AbstractSeparatorRange(Rule):
|
|||
if separator not in self.range_separators:
|
||||
separator = strip(separator)
|
||||
if separator in self.range_separators:
|
||||
initiator = previous_match.initiator
|
||||
for episode_number in range(previous_match.value + 1, next_match.value):
|
||||
match = copy.copy(next_match)
|
||||
match.value = episode_number
|
||||
initiator.children.append(match)
|
||||
to_append.append(match)
|
||||
to_append.append(Match(previous_match.end, next_match.start - 1,
|
||||
name=self.property_name + 'Separator',
|
||||
|
@ -428,15 +561,51 @@ class AbstractSeparatorRange(Rule):
|
|||
|
||||
previous_match = next_match
|
||||
|
||||
return to_remove, to_append
|
||||
if to_remove or to_append:
|
||||
return to_remove, to_append
|
||||
return False
|
||||
|
||||
|
||||
class RenameToAbsoluteEpisode(Rule):
|
||||
"""
|
||||
Rename episode to absolute_episodes.
|
||||
|
||||
Absolute episodes are only used if two groups of episodes are detected:
|
||||
S02E04-06 25-27
|
||||
25-27 S02E04-06
|
||||
2x04-06 25-27
|
||||
28. Anime Name S02E05
|
||||
The matches in the group with higher episode values are renamed to absolute_episode.
|
||||
"""
|
||||
|
||||
consequence = RenameMatch('absolute_episode')
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
initiators = {match.initiator for match in matches.named('episode')
|
||||
if len(match.initiator.children.named('episode')) > 1}
|
||||
if len(initiators) != 2:
|
||||
ret = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
if matches.range(filepart.start + 1, filepart.end, predicate=lambda m: m.name == 'episode'):
|
||||
ret.extend(
|
||||
matches.starting(filepart.start, predicate=lambda m: m.initiator.name == 'weak_episode'))
|
||||
return ret
|
||||
|
||||
initiators = sorted(initiators, key=lambda item: item.end)
|
||||
if not matches.holes(initiators[0].end, initiators[1].start, predicate=lambda m: m.raw.strip(seps)):
|
||||
first_range = matches.named('episode', predicate=lambda m: m.initiator == initiators[0])
|
||||
second_range = matches.named('episode', predicate=lambda m: m.initiator == initiators[1])
|
||||
if len(first_range) == len(second_range):
|
||||
if second_range[0].value > first_range[0].value:
|
||||
return second_range
|
||||
if first_range[0].value > second_range[0].value:
|
||||
return first_range
|
||||
|
||||
|
||||
class EpisodeNumberSeparatorRange(AbstractSeparatorRange):
|
||||
"""
|
||||
Remove separator matches and create matches for episoderNumber range.
|
||||
"""
|
||||
priority = 128
|
||||
consequence = [RemoveMatch, AppendMatch]
|
||||
|
||||
def __init__(self, range_separators):
|
||||
super(EpisodeNumberSeparatorRange, self).__init__(range_separators, "episode")
|
||||
|
@ -446,8 +615,6 @@ class SeasonSeparatorRange(AbstractSeparatorRange):
|
|||
"""
|
||||
Remove separator matches and create matches for season range.
|
||||
"""
|
||||
priority = 128
|
||||
consequence = [RemoveMatch, AppendMatch]
|
||||
|
||||
def __init__(self, range_separators):
|
||||
super(SeasonSeparatorRange, self).__init__(range_separators, "season")
|
||||
|
@ -455,7 +622,7 @@ class SeasonSeparatorRange(AbstractSeparatorRange):
|
|||
|
||||
class RemoveWeakIfMovie(Rule):
|
||||
"""
|
||||
Remove weak-movie tagged matches if it seems to be a movie.
|
||||
Remove weak-episode tagged matches if it seems to be a movie.
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
@ -471,19 +638,69 @@ class RemoveWeakIfMovie(Rule):
|
|||
year = matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'year', index=0)
|
||||
if year:
|
||||
remove = True
|
||||
next_match = matches.next(year, predicate=lambda m, fp=filepart: m.private and m.end <= fp.end, index=0)
|
||||
if next_match and not matches.at_match(next_match, predicate=lambda m: m.name == 'year'):
|
||||
next_match = matches.range(year.end, filepart.end, predicate=lambda m: m.private, index=0)
|
||||
if (next_match and not matches.holes(year.end, next_match.start, predicate=lambda m: m.raw.strip(seps))
|
||||
and not matches.at_match(next_match, predicate=lambda m: m.name == 'year')):
|
||||
to_ignore.add(next_match.initiator)
|
||||
|
||||
to_ignore.update(matches.range(filepart.start, filepart.end,
|
||||
predicate=lambda m: len(m.children.named('episode')) > 1))
|
||||
|
||||
to_remove.extend(matches.conflicting(year))
|
||||
if remove:
|
||||
to_remove.extend(matches.tagged('weak-movie', predicate=lambda m: m.initiator not in to_ignore))
|
||||
to_remove.extend(matches.tagged('weak-episode', predicate=(
|
||||
lambda m: m.initiator not in to_ignore and 'anime' not in m.tags)))
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class RemoveWeak(Rule):
|
||||
"""
|
||||
Remove weak-episode matches which appears after video, source, and audio matches.
|
||||
"""
|
||||
priority = 16
|
||||
consequence = RemoveMatch, AppendMatch
|
||||
|
||||
def __init__(self, episode_words):
|
||||
super(RemoveWeak, self).__init__()
|
||||
self.episode_words = episode_words
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
to_append = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
weaks = matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags)
|
||||
if weaks:
|
||||
weak = weaks[0]
|
||||
previous = matches.previous(weak, predicate=lambda m: m.name in (
|
||||
'audio_codec', 'screen_size', 'streaming_service', 'source', 'video_profile',
|
||||
'audio_channels', 'audio_profile'), index=0)
|
||||
if previous and not matches.holes(
|
||||
previous.end, weak.start, predicate=lambda m: m.raw.strip(seps)):
|
||||
if previous.raw.lower() in self.episode_words:
|
||||
try:
|
||||
episode = copy.copy(weak)
|
||||
episode.name = 'episode'
|
||||
episode.value = int(weak.value)
|
||||
episode.start = previous.start
|
||||
episode.private = False
|
||||
episode.tags = []
|
||||
|
||||
to_append.append(episode)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
to_remove.extend(weaks)
|
||||
if to_remove or to_append:
|
||||
return to_remove, to_append
|
||||
return False
|
||||
|
||||
|
||||
class RemoveWeakIfSxxExx(Rule):
|
||||
"""
|
||||
Remove weak-movie tagged matches if SxxExx pattern is matched.
|
||||
Remove weak-episode tagged matches if SxxExx pattern is matched.
|
||||
|
||||
Weak episodes at beginning of filepart are kept.
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
@ -492,9 +709,10 @@ class RemoveWeakIfSxxExx(Rule):
|
|||
to_remove = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
if matches.range(filepart.start, filepart.end,
|
||||
predicate=lambda match: not match.private and 'SxxExx' in match.tags):
|
||||
to_remove.extend(matches.range(
|
||||
filepart.start, filepart.end, predicate=lambda match: 'weak-movie' in match.tags))
|
||||
predicate=lambda m: not m.private and 'SxxExx' in m.tags):
|
||||
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags):
|
||||
if match.start != filepart.start or match.initiator.name != 'weak_episode':
|
||||
to_remove.append(match)
|
||||
return to_remove
|
||||
|
||||
|
||||
|
@ -575,7 +793,7 @@ class RemoveWeakDuplicate(Rule):
|
|||
for filepart in matches.markers.named('path'):
|
||||
patterns = defaultdict(list)
|
||||
for match in reversed(matches.range(filepart.start, filepart.end,
|
||||
predicate=lambda match: 'weak-duplicate' in match.tags)):
|
||||
predicate=lambda m: 'weak-duplicate' in m.tags)):
|
||||
if match.pattern in patterns[match.name]:
|
||||
to_remove.append(match)
|
||||
else:
|
||||
|
@ -615,15 +833,15 @@ class RemoveDetachedEpisodeNumber(Rule):
|
|||
|
||||
episode_numbers = []
|
||||
episode_values = set()
|
||||
for match in matches.named('episode', lambda match: not match.private and 'weak-movie' in match.tags):
|
||||
for match in matches.named('episode', lambda m: not m.private and 'weak-episode' in m.tags):
|
||||
if match.value not in episode_values:
|
||||
episode_numbers.append(match)
|
||||
episode_values.add(match.value)
|
||||
|
||||
episode_numbers = list(sorted(episode_numbers, key=lambda match: match.value))
|
||||
episode_numbers = list(sorted(episode_numbers, key=lambda m: m.value))
|
||||
if len(episode_numbers) > 1 and \
|
||||
episode_numbers[0].value < 10 and \
|
||||
episode_numbers[1].value - episode_numbers[0].value != 1:
|
||||
episode_numbers[0].value < 10 and \
|
||||
episode_numbers[1].value - episode_numbers[0].value != 1:
|
||||
parent = episode_numbers[0]
|
||||
while parent: # TODO: Add a feature in rebulk to avoid this ...
|
||||
ret.append(parent)
|
||||
|
@ -664,3 +882,31 @@ class EpisodeSingleDigitValidator(Rule):
|
|||
if not matches.range(*group.span, predicate=lambda match: match.name == 'title'):
|
||||
ret.append(episode)
|
||||
return ret
|
||||
|
||||
|
||||
class RenameToDiscMatch(Rule):
|
||||
"""
|
||||
Rename episodes detected with `d` episodeMarkers to `disc`.
|
||||
"""
|
||||
|
||||
consequence = [RenameMatch('disc'), RenameMatch('discMarker'), RemoveMatch]
|
||||
|
||||
def when(self, matches, context):
|
||||
discs = []
|
||||
markers = []
|
||||
to_remove = []
|
||||
|
||||
disc_disabled = is_disabled(context, 'disc')
|
||||
|
||||
for marker in matches.named('episodeMarker', predicate=lambda m: m.value.lower() == 'd'):
|
||||
if disc_disabled:
|
||||
to_remove.append(marker)
|
||||
to_remove.extend(marker.initiator.children)
|
||||
continue
|
||||
|
||||
markers.append(marker)
|
||||
discs.extend(sorted(marker.initiator.children.named('episode'), key=lambda m: m.value))
|
||||
|
||||
if discs or markers or to_remove:
|
||||
return discs, markers, to_remove
|
||||
return False
|
||||
|
|
|
@ -7,10 +7,11 @@ from rebulk import Rebulk, AppendMatch, Rule
|
|||
from rebulk.remodule import re
|
||||
|
||||
from ..common.formatters import cleanup
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def film():
|
||||
def film(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
:return: Created Rebulk object
|
||||
|
@ -18,7 +19,8 @@ def film():
|
|||
"""
|
||||
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, validate_all=True, validator={'__parent__': seps_surround})
|
||||
|
||||
rebulk.regex(r'f(\d{1,2})', name='film', private_parent=True, children=True, formatter=int)
|
||||
rebulk.regex(r'f(\d{1,2})', name='film', private_parent=True, children=True, formatter=int,
|
||||
disabled=lambda context: is_disabled(context, 'film'))
|
||||
|
||||
rebulk.rules(FilmTitleRule)
|
||||
|
||||
|
@ -33,7 +35,10 @@ class FilmTitleRule(Rule):
|
|||
|
||||
properties = {'film_title': [None]}
|
||||
|
||||
def when(self, matches, context):
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'film_title')
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
bonus_number = matches.named('film', lambda match: not match.private, index=0)
|
||||
if bonus_number:
|
||||
filepath = matches.markers.at_match(bonus_number, lambda marker: marker.name == 'path', 0)
|
||||
|
|
|
@ -1,72 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
format property
|
||||
"""
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk, RemoveMatch, Rule
|
||||
from ..common import dash
|
||||
from ..common.validators import seps_before, seps_after
|
||||
|
||||
|
||||
def format_():
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
||||
rebulk.defaults(name="format", tags=['video-codec-prefix', 'streaming_service.suffix'])
|
||||
|
||||
rebulk.regex("VHS", "VHS-?Rip", value="VHS")
|
||||
rebulk.regex("CAM", "CAM-?Rip", "HD-?CAM", value="Cam")
|
||||
rebulk.regex("TELESYNC", "TS", "HD-?TS", value="Telesync")
|
||||
rebulk.regex("WORKPRINT", "WP", value="Workprint")
|
||||
rebulk.regex("TELECINE", "TC", value="Telecine")
|
||||
rebulk.regex("PPV", "PPV-?Rip", value="PPV") # Pay Per View
|
||||
rebulk.regex("SD-?TV", "SD-?TV-?Rip", "Rip-?SD-?TV", "TV-?Rip",
|
||||
"Rip-?TV", "TV-?(?=Dub)", value="TV") # TV is too common to allow matching
|
||||
rebulk.regex("DVB-?Rip", "DVB", "PD-?TV", value="DVB")
|
||||
rebulk.regex("DVD", "DVD-?Rip", "VIDEO-?TS", "DVD-?R(?:$|(?!E))", # "DVD-?R(?:$|^E)" => DVD-Real ...
|
||||
"DVD-?9", "DVD-?5", value="DVD")
|
||||
|
||||
rebulk.regex("HD-?TV", "TV-?RIP-?HD", "HD-?TV-?RIP", "HD-?RIP", value="HDTV",
|
||||
conflict_solver=lambda match, other: other if other.name == 'other' else '__default__')
|
||||
rebulk.regex("VOD", "VOD-?Rip", value="VOD")
|
||||
rebulk.regex("WEB-?Rip", "WEB-?DL-?Rip", "WEB-?Cap", value="WEBRip")
|
||||
rebulk.regex("WEB-?DL", "WEB-?HD", "WEB", "DL-?WEB", "DL(?=-?Mux)", value="WEB-DL")
|
||||
rebulk.regex("HD-?DVD-?Rip", "HD-?DVD", value="HD-DVD")
|
||||
rebulk.regex("Blu-?ray(?:-?Rip)?", "B[DR]", "B[DR]-?Rip", "BD[59]", "BD25", "BD50", value="BluRay")
|
||||
rebulk.regex("AHDTV", value="AHDTV")
|
||||
rebulk.regex('UHD-?TV', 'UHD-?Rip', value='UHDTV',
|
||||
conflict_solver=lambda match, other: other if other.name == 'other' else '__default__')
|
||||
rebulk.regex("HDTC", value="HDTC")
|
||||
rebulk.regex("DSR", "DSR?-?Rip", "SAT-?Rip", "DTH", "DTH-?Rip", value="SATRip")
|
||||
|
||||
rebulk.rules(ValidateFormat)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class ValidateFormat(Rule):
|
||||
"""
|
||||
Validate format with screener property, with video_codec property or separated
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for format_match in matches.named('format'):
|
||||
if not seps_before(format_match) and \
|
||||
not matches.range(format_match.start - 1, format_match.start - 2,
|
||||
lambda match: 'format-prefix' in match.tags):
|
||||
ret.append(format_match)
|
||||
continue
|
||||
if not seps_after(format_match) and \
|
||||
not matches.range(format_match.end, format_match.end + 1,
|
||||
lambda match: 'format-suffix' in match.tags):
|
||||
ret.append(format_match)
|
||||
continue
|
||||
return ret
|
|
@ -11,55 +11,82 @@ import babelfish
|
|||
from rebulk import Rebulk, Rule, RemoveMatch, RenameMatch
|
||||
from rebulk.remodule import re
|
||||
|
||||
from ..common.words import iter_words, COMMON_WORDS
|
||||
from ..common import seps
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.words import iter_words
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def language():
|
||||
def language(config, common_words):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:param common_words: common words
|
||||
:type common_words: set
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk()
|
||||
subtitle_both = config['subtitle_affixes']
|
||||
subtitle_prefixes = sorted(subtitle_both + config['subtitle_prefixes'], key=length_comparator)
|
||||
subtitle_suffixes = sorted(subtitle_both + config['subtitle_suffixes'], key=length_comparator)
|
||||
lang_both = config['language_affixes']
|
||||
lang_prefixes = sorted(lang_both + config['language_prefixes'], key=length_comparator)
|
||||
lang_suffixes = sorted(lang_both + config['language_suffixes'], key=length_comparator)
|
||||
weak_affixes = frozenset(config['weak_affixes'])
|
||||
|
||||
rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'language') and
|
||||
is_disabled(context, 'subtitle_language')))
|
||||
|
||||
rebulk.string(*subtitle_prefixes, name="subtitle_language.prefix", ignore_case=True, private=True,
|
||||
validator=seps_surround, tags=['release-group-prefix'])
|
||||
validator=seps_surround, tags=['release-group-prefix'],
|
||||
disabled=lambda context: is_disabled(context, 'subtitle_language'))
|
||||
rebulk.string(*subtitle_suffixes, name="subtitle_language.suffix", ignore_case=True, private=True,
|
||||
validator=seps_surround)
|
||||
validator=seps_surround,
|
||||
disabled=lambda context: is_disabled(context, 'subtitle_language'))
|
||||
rebulk.string(*lang_suffixes, name="language.suffix", ignore_case=True, private=True,
|
||||
validator=seps_surround, tags=['format-suffix'])
|
||||
rebulk.functional(find_languages, properties={'language': [None]})
|
||||
rebulk.rules(SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule)
|
||||
validator=seps_surround, tags=['source-suffix'],
|
||||
disabled=lambda context: is_disabled(context, 'language'))
|
||||
|
||||
def find_languages(string, context=None):
|
||||
"""Find languages in the string
|
||||
|
||||
:return: list of tuple (property, Language, lang_word, word)
|
||||
"""
|
||||
return LanguageFinder(context, subtitle_prefixes, subtitle_suffixes,
|
||||
lang_prefixes, lang_suffixes, weak_affixes).find(string)
|
||||
|
||||
rebulk.functional(find_languages,
|
||||
properties={'language': [None]},
|
||||
disabled=lambda context: not context.get('allowed_languages'))
|
||||
rebulk.rules(SubtitleExtensionRule,
|
||||
SubtitlePrefixLanguageRule,
|
||||
SubtitleSuffixLanguageRule,
|
||||
RemoveLanguage,
|
||||
RemoveInvalidLanguages(common_words))
|
||||
|
||||
babelfish.language_converters['guessit'] = GuessitConverter(config['synonyms'])
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
COMMON_WORDS_STRICT = frozenset(['brazil'])
|
||||
|
||||
UNDETERMINED = babelfish.Language('und')
|
||||
|
||||
SYN = {('ell', None): ['gr', 'greek'],
|
||||
('spa', None): ['esp', 'español', 'espanol'],
|
||||
('fra', None): ['français', 'vf', 'vff', 'vfi', 'vfq'],
|
||||
('swe', None): ['se'],
|
||||
('por', 'BR'): ['po', 'pb', 'pob', 'ptbr', 'br', 'brazilian'],
|
||||
('cat', None): ['català', 'castellano', 'espanol castellano', 'español castellano'],
|
||||
('ces', None): ['cz'],
|
||||
('ukr', None): ['ua'],
|
||||
('zho', None): ['cn'],
|
||||
('jpn', None): ['jp'],
|
||||
('hrv', None): ['scr'],
|
||||
('mul', None): ['multi', 'dl']} # http://scenelingo.wordpress.com/2009/03/24/what-does-dl-mean/
|
||||
MULTIPLE = babelfish.Language('mul')
|
||||
NON_SPECIFIC_LANGUAGES = frozenset([UNDETERMINED, MULTIPLE])
|
||||
|
||||
|
||||
class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=missing-docstring
|
||||
_with_country_regexp = re.compile(r'(.*)\((.*)\)')
|
||||
_with_country_regexp2 = re.compile(r'(.*)-(.*)')
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, synonyms):
|
||||
self.guessit_exceptions = {}
|
||||
for (alpha3, country), synlist in SYN.items():
|
||||
for code, synlist in synonyms.items():
|
||||
if '_' in code:
|
||||
(alpha3, country) = code.split('_')
|
||||
else:
|
||||
(alpha3, country) = (code, None)
|
||||
for syn in synlist:
|
||||
self.guessit_exceptions[syn.lower()] = (alpha3, country, None)
|
||||
|
||||
|
@ -76,15 +103,7 @@ class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=m
|
|||
return str(babelfish.Language(alpha3, country, script))
|
||||
|
||||
def reverse(self, name): # pylint:disable=arguments-differ
|
||||
with_country = (GuessitConverter._with_country_regexp.match(name) or
|
||||
GuessitConverter._with_country_regexp2.match(name))
|
||||
|
||||
name = name.lower()
|
||||
if with_country:
|
||||
lang = babelfish.Language.fromguessit(with_country.group(1).strip())
|
||||
lang.country = babelfish.Country.fromguessit(with_country.group(2).strip())
|
||||
return lang.alpha3, lang.country.alpha2 if lang.country else None, lang.script or None
|
||||
|
||||
# exceptions come first, as they need to override a potential match
|
||||
# with any of the other guessers
|
||||
try:
|
||||
|
@ -96,7 +115,8 @@ class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=m
|
|||
babelfish.Language.fromalpha3b,
|
||||
babelfish.Language.fromalpha2,
|
||||
babelfish.Language.fromname,
|
||||
babelfish.Language.fromopensubtitles]:
|
||||
babelfish.Language.fromopensubtitles,
|
||||
babelfish.Language.fromietf]:
|
||||
try:
|
||||
reverse = conv(name)
|
||||
return reverse.alpha3, reverse.country, reverse.script
|
||||
|
@ -113,24 +133,6 @@ def length_comparator(value):
|
|||
return len(value)
|
||||
|
||||
|
||||
babelfish.language_converters['guessit'] = GuessitConverter()
|
||||
|
||||
|
||||
subtitle_both = ['sub', 'subs', 'subbed', 'custom subbed', 'custom subs',
|
||||
'custom sub', 'customsubbed', 'customsubs', 'customsub',
|
||||
'soft subtitles', 'soft subs']
|
||||
subtitle_prefixes = sorted(subtitle_both +
|
||||
['st', 'vost', 'subforced', 'fansub', 'hardsub',
|
||||
'legenda', 'legendas', 'legendado', 'subtitulado',
|
||||
'soft', 'subtitles'], key=length_comparator)
|
||||
subtitle_suffixes = sorted(subtitle_both +
|
||||
['subforced', 'fansub', 'hardsub'], key=length_comparator)
|
||||
lang_both = ['dublado', 'dubbed', 'dub']
|
||||
lang_suffixes = sorted(lang_both + ['audio'], key=length_comparator)
|
||||
lang_prefixes = sorted(lang_both + ['true'], key=length_comparator)
|
||||
|
||||
weak_prefixes = ('audio', 'true')
|
||||
|
||||
_LanguageMatch = namedtuple('_LanguageMatch', ['property_name', 'word', 'lang'])
|
||||
|
||||
|
||||
|
@ -149,7 +151,7 @@ class LanguageWord(object):
|
|||
self.next_word = next_word
|
||||
|
||||
@property
|
||||
def extended_word(self):
|
||||
def extended_word(self): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Return the extended word for this instance, if any.
|
||||
"""
|
||||
|
@ -175,10 +177,17 @@ def to_rebulk_match(language_match):
|
|||
end = word.end
|
||||
name = language_match.property_name
|
||||
if language_match.lang == UNDETERMINED:
|
||||
return start, end, dict(name=name, value=word.value.lower(),
|
||||
formatter=babelfish.Language, tags=['weak-language'])
|
||||
return start, end, {
|
||||
'name': name,
|
||||
'value': word.value.lower(),
|
||||
'formatter': babelfish.Language,
|
||||
'tags': ['weak-language']
|
||||
}
|
||||
|
||||
return start, end, dict(name=name, value=language_match.lang)
|
||||
return start, end, {
|
||||
'name': name,
|
||||
'value': language_match.lang
|
||||
}
|
||||
|
||||
|
||||
class LanguageFinder(object):
|
||||
|
@ -186,10 +195,21 @@ class LanguageFinder(object):
|
|||
Helper class to search and return language matches: 'language' and 'subtitle_language' properties
|
||||
"""
|
||||
|
||||
def __init__(self, allowed_languages):
|
||||
self.parsed = dict()
|
||||
self.allowed_languages = allowed_languages
|
||||
self.common_words = COMMON_WORDS_STRICT if allowed_languages else COMMON_WORDS
|
||||
def __init__(self, context,
|
||||
subtitle_prefixes, subtitle_suffixes,
|
||||
lang_prefixes, lang_suffixes, weak_affixes):
|
||||
allowed_languages = context.get('allowed_languages') if context else None
|
||||
self.allowed_languages = {l.lower() for l in allowed_languages or []}
|
||||
self.weak_affixes = weak_affixes
|
||||
self.prefixes_map = {}
|
||||
self.suffixes_map = {}
|
||||
|
||||
if not is_disabled(context, 'subtitle_language'):
|
||||
self.prefixes_map['subtitle_language'] = subtitle_prefixes
|
||||
self.suffixes_map['subtitle_language'] = subtitle_suffixes
|
||||
|
||||
self.prefixes_map['language'] = lang_prefixes
|
||||
self.suffixes_map['language'] = lang_suffixes
|
||||
|
||||
def find(self, string):
|
||||
"""
|
||||
|
@ -250,11 +270,11 @@ class LanguageFinder(object):
|
|||
"""
|
||||
tuples = [
|
||||
(language_word, language_word.next_word,
|
||||
dict(subtitle_language=subtitle_prefixes, language=lang_prefixes),
|
||||
self.prefixes_map,
|
||||
lambda string, prefix: string.startswith(prefix),
|
||||
lambda string, prefix: string[len(prefix):]),
|
||||
(language_word.next_word, language_word,
|
||||
dict(subtitle_language=subtitle_suffixes, language=lang_suffixes),
|
||||
self.suffixes_map,
|
||||
lambda string, suffix: string.endswith(suffix),
|
||||
lambda string, suffix: string[:len(string) - len(suffix)])
|
||||
]
|
||||
|
@ -271,7 +291,7 @@ class LanguageFinder(object):
|
|||
if match:
|
||||
yield match
|
||||
|
||||
def find_match_for_word(self, word, fallback_word, affixes, is_affix, strip_affix):
|
||||
def find_match_for_word(self, word, fallback_word, affixes, is_affix, strip_affix): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Return the language match for the given word and affixes.
|
||||
"""
|
||||
|
@ -280,8 +300,6 @@ class LanguageFinder(object):
|
|||
continue
|
||||
|
||||
word_lang = current_word.value.lower()
|
||||
if word_lang in self.common_words:
|
||||
continue
|
||||
|
||||
for key, parts in affixes.items():
|
||||
for part in parts:
|
||||
|
@ -291,30 +309,31 @@ class LanguageFinder(object):
|
|||
match = None
|
||||
value = strip_affix(word_lang, part)
|
||||
if not value:
|
||||
if fallback_word:
|
||||
match = self.find_language_match_for_word(fallback_word, key=key, force=True)
|
||||
if fallback_word and (
|
||||
abs(fallback_word.start - word.end) <= 1 or abs(word.start - fallback_word.end) <= 1):
|
||||
match = self.find_language_match_for_word(fallback_word, key=key)
|
||||
|
||||
if not match and part not in weak_prefixes:
|
||||
if not match and part not in self.weak_affixes:
|
||||
match = self.create_language_match(key, LanguageWord(current_word.start, current_word.end,
|
||||
'und', current_word.input_string))
|
||||
elif value not in self.common_words:
|
||||
else:
|
||||
match = self.create_language_match(key, LanguageWord(current_word.start, current_word.end,
|
||||
value, current_word.input_string))
|
||||
|
||||
if match:
|
||||
return match
|
||||
|
||||
def find_language_match_for_word(self, word, key='language', force=False):
|
||||
def find_language_match_for_word(self, word, key='language'): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Return the language match for the given word.
|
||||
"""
|
||||
for current_word in (word.extended_word, word):
|
||||
if current_word and (force or current_word.value.lower() not in self.common_words):
|
||||
if current_word:
|
||||
match = self.create_language_match(key, current_word)
|
||||
if match:
|
||||
return match
|
||||
|
||||
def create_language_match(self, key, word):
|
||||
def create_language_match(self, key, word): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Create a LanguageMatch for a given word
|
||||
"""
|
||||
|
@ -323,40 +342,21 @@ class LanguageFinder(object):
|
|||
if lang is not None:
|
||||
return _LanguageMatch(property_name=key, word=word, lang=lang)
|
||||
|
||||
def parse_language(self, lang_word):
|
||||
def parse_language(self, lang_word): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Parse the lang_word into a valid Language.
|
||||
|
||||
Multi and Undetermined languages are also valid languages.
|
||||
"""
|
||||
if lang_word in self.parsed:
|
||||
return self.parsed[lang_word]
|
||||
|
||||
try:
|
||||
lang = babelfish.Language.fromguessit(lang_word)
|
||||
if self.allowed_languages:
|
||||
if (hasattr(lang, 'name') and lang.name.lower() in self.allowed_languages) \
|
||||
or (hasattr(lang, 'alpha2') and lang.alpha2.lower() in self.allowed_languages) \
|
||||
or lang.alpha3.lower() in self.allowed_languages:
|
||||
self.parsed[lang_word] = lang
|
||||
return lang
|
||||
# Keep language with alpha2 equivalent. Others are probably
|
||||
# uncommon languages.
|
||||
elif lang in ('mul', UNDETERMINED) or hasattr(lang, 'alpha2'):
|
||||
self.parsed[lang_word] = lang
|
||||
if ((hasattr(lang, 'name') and lang.name.lower() in self.allowed_languages) or
|
||||
(hasattr(lang, 'alpha2') and lang.alpha2.lower() in self.allowed_languages) or
|
||||
lang.alpha3.lower() in self.allowed_languages):
|
||||
return lang
|
||||
|
||||
self.parsed[lang_word] = None
|
||||
except babelfish.Error:
|
||||
self.parsed[lang_word] = None
|
||||
|
||||
|
||||
def find_languages(string, context=None):
|
||||
"""Find languages in the string
|
||||
|
||||
:return: list of tuple (property, Language, lang_word, word)
|
||||
"""
|
||||
return LanguageFinder(context.get('allowed_languages')).find(string)
|
||||
pass
|
||||
|
||||
|
||||
class SubtitlePrefixLanguageRule(Rule):
|
||||
|
@ -367,6 +367,9 @@ class SubtitlePrefixLanguageRule(Rule):
|
|||
|
||||
properties = {'subtitle_language': [None]}
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'subtitle_language')
|
||||
|
||||
def when(self, matches, context):
|
||||
to_rename = []
|
||||
to_remove = matches.named('subtitle_language.prefix')
|
||||
|
@ -387,7 +390,9 @@ class SubtitlePrefixLanguageRule(Rule):
|
|||
to_remove.extend(matches.conflicting(lang))
|
||||
if prefix in to_remove:
|
||||
to_remove.remove(prefix)
|
||||
return to_rename, to_remove
|
||||
if to_rename or to_remove:
|
||||
return to_rename, to_remove
|
||||
return False
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
to_rename, to_remove = when_response
|
||||
|
@ -412,6 +417,9 @@ class SubtitleSuffixLanguageRule(Rule):
|
|||
|
||||
properties = {'subtitle_language': [None]}
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'subtitle_language')
|
||||
|
||||
def when(self, matches, context):
|
||||
to_append = []
|
||||
to_remove = matches.named('subtitle_language.suffix')
|
||||
|
@ -421,7 +429,9 @@ class SubtitleSuffixLanguageRule(Rule):
|
|||
to_append.append(lang)
|
||||
if suffix in to_remove:
|
||||
to_remove.remove(suffix)
|
||||
return to_append, to_remove
|
||||
if to_append or to_remove:
|
||||
return to_append, to_remove
|
||||
return False
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
to_rename, to_remove = when_response
|
||||
|
@ -436,17 +446,65 @@ class SubtitleExtensionRule(Rule):
|
|||
"""
|
||||
Convert language guess as subtitle_language if next match is a subtitle extension.
|
||||
|
||||
Since it's a strong match, it also removes any conflicting format with it.
|
||||
Since it's a strong match, it also removes any conflicting source with it.
|
||||
"""
|
||||
consequence = [RemoveMatch, RenameMatch('subtitle_language')]
|
||||
|
||||
properties = {'subtitle_language': [None]}
|
||||
|
||||
def when(self, matches, context):
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'subtitle_language')
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
subtitle_extension = matches.named('container',
|
||||
lambda match: 'extension' in match.tags and 'subtitle' in match.tags,
|
||||
0)
|
||||
if subtitle_extension:
|
||||
subtitle_lang = matches.previous(subtitle_extension, lambda match: match.name == 'language', 0)
|
||||
if subtitle_lang:
|
||||
return matches.conflicting(subtitle_lang, lambda m: m.name == 'format'), subtitle_lang
|
||||
for weak in matches.named('subtitle_language', predicate=lambda m: 'weak-language' in m.tags):
|
||||
weak.private = True
|
||||
|
||||
return matches.conflicting(subtitle_lang, lambda m: m.name == 'source'), subtitle_lang
|
||||
|
||||
|
||||
class RemoveLanguage(Rule):
|
||||
"""Remove language matches that were not converted to subtitle_language when language is disabled."""
|
||||
|
||||
consequence = RemoveMatch
|
||||
|
||||
def enabled(self, context):
|
||||
return is_disabled(context, 'language')
|
||||
|
||||
def when(self, matches, context):
|
||||
return matches.named('language')
|
||||
|
||||
|
||||
class RemoveInvalidLanguages(Rule):
|
||||
"""Remove language matches that matches the blacklisted common words."""
|
||||
|
||||
consequence = RemoveMatch
|
||||
priority = 32
|
||||
|
||||
def __init__(self, common_words):
|
||||
"""Constructor."""
|
||||
super(RemoveInvalidLanguages, self).__init__()
|
||||
self.common_words = common_words
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for match in matches.range(0, len(matches.input_string),
|
||||
predicate=lambda m: m.name in ('language', 'subtitle_language')):
|
||||
if match.raw.lower() not in self.common_words:
|
||||
continue
|
||||
|
||||
group = matches.markers.at_match(match, index=0, predicate=lambda m: m.name == 'group')
|
||||
if group and (
|
||||
not matches.range(
|
||||
group.start, group.end, predicate=lambda m: m.name not in ('language', 'subtitle_language')
|
||||
) and (not matches.holes(group.start, group.end, predicate=lambda m: m.value.strip(seps)))):
|
||||
continue
|
||||
|
||||
to_remove.append(match)
|
||||
|
||||
return to_remove
|
||||
|
|
|
@ -8,16 +8,23 @@ import mimetypes
|
|||
from rebulk import Rebulk, CustomRule, POST_PROCESS
|
||||
from rebulk.match import Match
|
||||
|
||||
from ..common.pattern import is_disabled
|
||||
from ...rules.processors import Processors
|
||||
|
||||
|
||||
def mimetype():
|
||||
def mimetype(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
return Rebulk().rules(Mimetype)
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'mimetype'))
|
||||
rebulk.rules(Mimetype)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class Mimetype(CustomRule):
|
||||
|
|
|
@ -5,38 +5,55 @@ other property
|
|||
"""
|
||||
import copy
|
||||
|
||||
from rebulk import Rebulk, Rule, RemoveMatch, POST_PROCESS, AppendMatch
|
||||
from rebulk import Rebulk, Rule, RemoveMatch, RenameMatch, POST_PROCESS, AppendMatch
|
||||
from rebulk.remodule import re
|
||||
|
||||
from ..common import dash
|
||||
from ..common import seps
|
||||
from ..common.validators import seps_after, seps_before, seps_surround, compose
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_after, seps_before, seps_surround, and_
|
||||
from ...reutils import build_or_pattern
|
||||
from ...rules.common.formatters import raw_cleanup
|
||||
|
||||
|
||||
def other():
|
||||
def other(config): # pylint:disable=unused-argument,too-many-statements
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'other'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
|
||||
rebulk.defaults(name="other", validator=seps_surround)
|
||||
|
||||
rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='AudioFix')
|
||||
rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='SyncFix')
|
||||
rebulk.regex('Dual', 'Dual-?Audio', value='DualAudio')
|
||||
rebulk.regex('ws', 'wide-?screen', value='WideScreen')
|
||||
rebulk.regex('Re-?Enc(?:oded)?', value='ReEncoded')
|
||||
rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='Audio Fixed')
|
||||
rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='Sync Fixed')
|
||||
rebulk.regex('Dual', 'Dual-?Audio', value='Dual Audio')
|
||||
rebulk.regex('ws', 'wide-?screen', value='Widescreen')
|
||||
rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded')
|
||||
|
||||
rebulk.string('Real', 'Fix', 'Fixed', value='Proper', tags=['has-neighbor-before', 'has-neighbor-after'])
|
||||
rebulk.string('Proper', 'Repack', 'Rerip', 'Dirfix', 'Nfofix', 'Prooffix', value='Proper',
|
||||
rebulk.string('Repack', 'Rerip', value='Proper',
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||
rebulk.regex('(?:Proof-?)?Sample-?Fix', value='Proper',
|
||||
rebulk.string('Proper', value='Proper',
|
||||
tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix'])
|
||||
|
||||
rebulk.regex('Real-Proper', 'Real-Repack', 'Real-Rerip', value='Proper',
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix', 'real'])
|
||||
rebulk.regex('Real', value='Proper',
|
||||
tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix', 'real'])
|
||||
|
||||
rebulk.string('Fix', 'Fixed', value='Fix', tags=['has-neighbor-before', 'has-neighbor-after',
|
||||
'streaming_service.prefix', 'streaming_service.suffix'])
|
||||
rebulk.string('Dirfix', 'Nfofix', 'Prooffix', value='Fix',
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||
rebulk.regex('(?:Proof-?)?Sample-?Fix', value='Fix',
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||
rebulk.string('Fansub', value='Fansub', tags='has-neighbor')
|
||||
rebulk.string('Fastsub', value='Fastsub', tags='has-neighbor')
|
||||
|
||||
rebulk.string('Fansub', value='Fan Subtitled', tags='has-neighbor')
|
||||
rebulk.string('Fastsub', value='Fast Subtitled', tags='has-neighbor')
|
||||
|
||||
season_words = build_or_pattern(["seasons?", "series?"])
|
||||
complete_articles = build_or_pattern(["The"])
|
||||
|
@ -60,30 +77,42 @@ def other():
|
|||
private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'],
|
||||
value={'other': 'Complete'},
|
||||
tags=['release-group-prefix'],
|
||||
validator={'__parent__': compose(seps_surround, validate_complete)})
|
||||
rebulk.string('R5', 'RC', value='R5')
|
||||
validator={'__parent__': and_(seps_surround, validate_complete)})
|
||||
rebulk.string('R5', value='Region 5')
|
||||
rebulk.string('RC', value='Region C')
|
||||
rebulk.regex('Pre-?Air', value='Preair')
|
||||
rebulk.regex('(?:PS-?)?Vita', value='PS Vita')
|
||||
rebulk.regex('(?:PS-?)Vita', value='PS Vita')
|
||||
rebulk.regex('Vita', value='PS Vita', tags='has-neighbor')
|
||||
rebulk.regex('(HD)(?P<another>Rip)', value={'other': 'HD', 'another': 'Rip'},
|
||||
private_parent=True, children=True, validator={'__parent__': seps_surround}, validate_all=True)
|
||||
|
||||
for value in (
|
||||
'Screener', 'Remux', '3D', 'mHD', 'HDLight', 'HQ', 'DDC', 'HR', 'PAL', 'SECAM', 'NTSC',
|
||||
'CC', 'LD', 'MD', 'XXX'):
|
||||
for value in ('Screener', 'Remux', 'PAL', 'SECAM', 'NTSC', 'XXX'):
|
||||
rebulk.string(value, value=value)
|
||||
rebulk.string('3D', value='3D', tags='has-neighbor')
|
||||
|
||||
rebulk.string('LDTV', value='LD')
|
||||
rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor')
|
||||
rebulk.string('HR', value='High Resolution')
|
||||
rebulk.string('LD', value='Line Dubbed')
|
||||
rebulk.string('MD', value='Mic Dubbed')
|
||||
rebulk.string('mHD', 'HDLight', value='Micro HD')
|
||||
rebulk.string('LDTV', value='Low Definition')
|
||||
rebulk.string('HFR', value='High Frame Rate')
|
||||
rebulk.string('VFR', value='Variable Frame Rate')
|
||||
rebulk.string('HD', value='HD', validator=None,
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||
rebulk.regex('Full-?HD', 'FHD', value='FullHD', validator=None,
|
||||
rebulk.regex('Full-?HD', 'FHD', value='Full HD', validator=None,
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||
rebulk.regex('Ultra-?(?:HD)?', 'UHD', value='UltraHD', validator=None,
|
||||
rebulk.regex('Ultra-?(?:HD)?', 'UHD', value='Ultra HD', validator=None,
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||
rebulk.regex('Upscaled?', value='Upscaled')
|
||||
|
||||
for value in ('Complete', 'Classic', 'LiNE', 'Bonus', 'Trailer', 'FINAL', 'Retail',
|
||||
for value in ('Complete', 'Classic', 'Bonus', 'Trailer', 'Retail',
|
||||
'Colorized', 'Internal'):
|
||||
rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.regex('LiNE', value='Line Audio', tags=['has-neighbor-before', 'has-neighbor-after', 'release-group-prefix'])
|
||||
rebulk.regex('Read-?NFO', value='Read NFO')
|
||||
rebulk.string('CONVERT', value='Converted', tags='has-neighbor')
|
||||
rebulk.string('DOCU', value='Documentary', tags='has-neighbor')
|
||||
rebulk.string('DOCU', 'DOKU', value='Documentary', tags='has-neighbor')
|
||||
rebulk.string('OM', value='Open Matte', tags='has-neighbor')
|
||||
rebulk.string('STV', value='Straight to Video', tags='has-neighbor')
|
||||
rebulk.string('OAR', value='Original Aspect Ratio', tags='has-neighbor')
|
||||
|
@ -92,16 +121,30 @@ def other():
|
|||
for coast in ('East', 'West'):
|
||||
rebulk.regex(r'(?:Live-)?(?:Episode-)?' + coast + '-?(?:Coast-)?Feed', value=coast + ' Coast Feed')
|
||||
|
||||
rebulk.string('VO', 'OV', value='OV', tags='has-neighbor')
|
||||
rebulk.string('VO', 'OV', value='Original Video', tags='has-neighbor')
|
||||
rebulk.string('Ova', 'Oav', value='Original Animated Video')
|
||||
|
||||
rebulk.regex('Scr(?:eener)?', value='Screener', validator=None,
|
||||
tags=['other.validate.screener', 'format-prefix', 'format-suffix'])
|
||||
tags=['other.validate.screener', 'source-prefix', 'source-suffix'])
|
||||
rebulk.string('Mux', value='Mux', validator=seps_after,
|
||||
tags=['other.validate.mux', 'video-codec-prefix', 'format-suffix'])
|
||||
rebulk.string('HC', value='Hardcoded Subtitles')
|
||||
tags=['other.validate.mux', 'video-codec-prefix', 'source-suffix'])
|
||||
rebulk.string('HC', 'vost', value='Hardcoded Subtitles')
|
||||
|
||||
rebulk.rules(ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore, ValidateScreenerRule,
|
||||
ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor, ProperCountRule)
|
||||
rebulk.string('SDR', value='Standard Dynamic Range', tags='uhdbluray-neighbor')
|
||||
rebulk.regex('HDR(?:10)?', value='HDR10', tags='uhdbluray-neighbor')
|
||||
rebulk.regex('Dolby-?Vision', value='Dolby Vision', tags='uhdbluray-neighbor')
|
||||
rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor')
|
||||
|
||||
rebulk.string('Sample', value='Sample', tags=['at-end', 'not-a-release-group'])
|
||||
rebulk.string('Extras', value='Extras', tags='has-neighbor')
|
||||
rebulk.regex('Digital-?Extras?', value='Extras')
|
||||
rebulk.string('Proof', value='Proof', tags=['at-end', 'not-a-release-group'])
|
||||
rebulk.string('Obfuscated', 'Scrambled', value='Obfuscated', tags=['at-end', 'not-a-release-group'])
|
||||
rebulk.string('xpost', 'postbot', 'asrequested', value='Repost', tags='not-a-release-group')
|
||||
|
||||
rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore,
|
||||
ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor,
|
||||
ValidateAtEnd, ValidateReal, ProperCountRule)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
@ -116,7 +159,7 @@ class ProperCountRule(Rule):
|
|||
|
||||
properties = {'proper_count': [None]}
|
||||
|
||||
def when(self, matches, context):
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
propers = matches.named('other', lambda match: match.value == 'Proper')
|
||||
if propers:
|
||||
raws = {} # Count distinct raw values
|
||||
|
@ -124,15 +167,32 @@ class ProperCountRule(Rule):
|
|||
raws[raw_cleanup(proper.raw)] = proper
|
||||
proper_count_match = copy.copy(propers[-1])
|
||||
proper_count_match.name = 'proper_count'
|
||||
proper_count_match.value = len(raws)
|
||||
|
||||
value = 0
|
||||
for raw in raws.values():
|
||||
value += 2 if 'real' in raw.tags else 1
|
||||
|
||||
proper_count_match.value = value
|
||||
return proper_count_match
|
||||
|
||||
|
||||
class RenameAnotherToOther(Rule):
|
||||
"""
|
||||
Rename `another` properties to `other`
|
||||
"""
|
||||
priority = 32
|
||||
consequence = RenameMatch('other')
|
||||
|
||||
def when(self, matches, context):
|
||||
return matches.named('another')
|
||||
|
||||
|
||||
class ValidateHasNeighbor(Rule):
|
||||
"""
|
||||
Validate tag has-neighbor
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
priority = 64
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
|
@ -158,6 +218,7 @@ class ValidateHasNeighborBefore(Rule):
|
|||
Validate tag has-neighbor-before that previous match exists.
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
priority = 64
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
|
@ -177,6 +238,7 @@ class ValidateHasNeighborAfter(Rule):
|
|||
Validate tag has-neighbor-after that next match exists.
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
priority = 64
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
|
@ -201,8 +263,8 @@ class ValidateScreenerRule(Rule):
|
|||
def when(self, matches, context):
|
||||
ret = []
|
||||
for screener in matches.named('other', lambda match: 'other.validate.screener' in match.tags):
|
||||
format_match = matches.previous(screener, lambda match: match.name == 'format', 0)
|
||||
if not format_match or matches.input_string[format_match.end:screener.start].strip(seps):
|
||||
source_match = matches.previous(screener, lambda match: match.initiator.name == 'source', 0)
|
||||
if not source_match or matches.input_string[source_match.end:screener.start].strip(seps):
|
||||
ret.append(screener)
|
||||
return ret
|
||||
|
||||
|
@ -217,8 +279,8 @@ class ValidateMuxRule(Rule):
|
|||
def when(self, matches, context):
|
||||
ret = []
|
||||
for mux in matches.named('other', lambda match: 'other.validate.mux' in match.tags):
|
||||
format_match = matches.previous(mux, lambda match: match.name == 'format', 0)
|
||||
if not format_match:
|
||||
source_match = matches.previous(mux, lambda match: match.initiator.name == 'source', 0)
|
||||
if not source_match:
|
||||
ret.append(mux)
|
||||
return ret
|
||||
|
||||
|
@ -257,16 +319,18 @@ class ValidateStreamingServiceNeighbor(Rule):
|
|||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for match in matches.named('other',
|
||||
predicate=lambda m: ('streaming_service.prefix' in m.tags or
|
||||
'streaming_service.suffix' in m.tags)):
|
||||
|
||||
predicate=lambda m: (m.initiator.name != 'source'
|
||||
and ('streaming_service.prefix' in m.tags
|
||||
or 'streaming_service.suffix' in m.tags))):
|
||||
match = match.initiator
|
||||
if not seps_after(match):
|
||||
if 'streaming_service.prefix' in match.tags:
|
||||
next_match = matches.next(match, lambda m: m.name == 'streaming_service', 0)
|
||||
if next_match and not matches.holes(match.end, next_match.start,
|
||||
predicate=lambda m: m.value.strip(seps)):
|
||||
continue
|
||||
|
||||
if match.children:
|
||||
to_remove.extend(match.children)
|
||||
to_remove.append(match)
|
||||
|
||||
elif not seps_before(match):
|
||||
|
@ -276,6 +340,44 @@ class ValidateStreamingServiceNeighbor(Rule):
|
|||
predicate=lambda m: m.value.strip(seps)):
|
||||
continue
|
||||
|
||||
if match.children:
|
||||
to_remove.extend(match.children)
|
||||
to_remove.append(match)
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class ValidateAtEnd(Rule):
|
||||
"""Validate other which should occur at the end of a filepart."""
|
||||
|
||||
priority = 32
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
for match in matches.range(filepart.start, filepart.end,
|
||||
predicate=lambda m: m.name == 'other' and 'at-end' in m.tags):
|
||||
if (matches.holes(match.end, filepart.end, predicate=lambda m: m.value.strip(seps)) or
|
||||
matches.range(match.end, filepart.end, predicate=lambda m: m.name not in (
|
||||
'other', 'container'))):
|
||||
to_remove.append(match)
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class ValidateReal(Rule):
|
||||
"""
|
||||
Validate Real
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
priority = 64
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
for match in matches.range(filepart.start, filepart.end, lambda m: m.name == 'other' and 'real' in m.tags):
|
||||
if not matches.range(filepart.start, match.start):
|
||||
ret.append(match)
|
||||
|
||||
return ret
|
||||
|
|
|
@ -7,20 +7,25 @@ from rebulk.remodule import re
|
|||
|
||||
from rebulk import Rebulk
|
||||
from ..common import dash
|
||||
from ..common.validators import seps_surround, int_coercable, compose
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround, int_coercable, and_
|
||||
from ..common.numeral import numeral, parse_numeral
|
||||
from ...reutils import build_or_pattern
|
||||
|
||||
|
||||
def part():
|
||||
def part(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], validator={'__parent__': seps_surround})
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'part'))
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], validator={'__parent__': seps_surround})
|
||||
|
||||
prefixes = ['pt', 'part']
|
||||
prefixes = config['prefixes']
|
||||
|
||||
def validate_roman(match):
|
||||
"""
|
||||
|
@ -36,6 +41,6 @@ def part():
|
|||
|
||||
rebulk.regex(build_or_pattern(prefixes) + r'-?(?P<part>' + numeral + r')',
|
||||
prefixes=prefixes, validate_all=True, private_parent=True, children=True, formatter=parse_numeral,
|
||||
validator={'part': compose(validate_roman, lambda m: 0 < m.value < 100)})
|
||||
validator={'part': and_(validate_roman, lambda m: 0 < m.value < 100)})
|
||||
|
||||
return rebulk
|
||||
|
|
|
@ -6,22 +6,53 @@ release_group property
|
|||
import copy
|
||||
|
||||
from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch
|
||||
from rebulk.match import Match
|
||||
|
||||
from ..common import seps
|
||||
from ..common.expected import build_expected_function
|
||||
from ..common.comparators import marker_sorted
|
||||
from ..common.expected import build_expected_function
|
||||
from ..common.formatters import cleanup
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import int_coercable, seps_surround
|
||||
from ..properties.title import TitleFromPosition
|
||||
|
||||
|
||||
def release_group():
|
||||
def release_group(config):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk()
|
||||
forbidden_groupnames = config['forbidden_names']
|
||||
|
||||
groupname_ignore_seps = config['ignored_seps']
|
||||
groupname_seps = ''.join([c for c in seps if c not in groupname_ignore_seps])
|
||||
|
||||
def clean_groupname(string):
|
||||
"""
|
||||
Removes and strip separators from input_string
|
||||
:param string:
|
||||
:type string:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
string = string.strip(groupname_seps)
|
||||
if not (string.endswith(tuple(groupname_ignore_seps)) and string.startswith(tuple(groupname_ignore_seps))) \
|
||||
and not any(i in string.strip(groupname_ignore_seps) for i in groupname_ignore_seps):
|
||||
string = string.strip(groupname_ignore_seps)
|
||||
for forbidden in forbidden_groupnames:
|
||||
if string.lower().startswith(forbidden) and string[len(forbidden):len(forbidden) + 1] in seps:
|
||||
string = string[len(forbidden):]
|
||||
string = string.strip(groupname_seps)
|
||||
if string.lower().endswith(forbidden) and string[-len(forbidden) - 1:-len(forbidden)] in seps:
|
||||
string = string[:len(forbidden)]
|
||||
string = string.strip(groupname_seps)
|
||||
return string.strip()
|
||||
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'release_group'))
|
||||
|
||||
expected_group = build_expected_function('expected_group')
|
||||
|
||||
|
@ -30,42 +61,142 @@ def release_group():
|
|||
conflict_solver=lambda match, other: other,
|
||||
disabled=lambda context: not context.get('expected_group'))
|
||||
|
||||
return rebulk.rules(SceneReleaseGroup, AnimeReleaseGroup)
|
||||
return rebulk.rules(
|
||||
DashSeparatedReleaseGroup(clean_groupname),
|
||||
SceneReleaseGroup(clean_groupname),
|
||||
AnimeReleaseGroup
|
||||
)
|
||||
|
||||
|
||||
forbidden_groupnames = ['rip', 'by', 'for', 'par', 'pour', 'bonus']
|
||||
|
||||
groupname_ignore_seps = '[]{}()'
|
||||
groupname_seps = ''.join([c for c in seps if c not in groupname_ignore_seps])
|
||||
|
||||
|
||||
def clean_groupname(string):
|
||||
"""
|
||||
Removes and strip separators from input_string
|
||||
:param string:
|
||||
:type string:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
string = string.strip(groupname_seps)
|
||||
if not (string.endswith(tuple(groupname_ignore_seps)) and string.startswith(tuple(groupname_ignore_seps))) \
|
||||
and not any(i in string.strip(groupname_ignore_seps) for i in groupname_ignore_seps):
|
||||
string = string.strip(groupname_ignore_seps)
|
||||
for forbidden in forbidden_groupnames:
|
||||
if string.lower().startswith(forbidden) and string[len(forbidden):len(forbidden)+1] in seps:
|
||||
string = string[len(forbidden):]
|
||||
string = string.strip(groupname_seps)
|
||||
if string.lower().endswith(forbidden) and string[-len(forbidden)-1:-len(forbidden)] in seps:
|
||||
string = string[:len(forbidden)]
|
||||
string = string.strip(groupname_seps)
|
||||
return string
|
||||
|
||||
|
||||
_scene_previous_names = ['video_codec', 'format', 'video_api', 'audio_codec', 'audio_profile', 'video_profile',
|
||||
_scene_previous_names = ('video_codec', 'source', 'video_api', 'audio_codec', 'audio_profile', 'video_profile',
|
||||
'audio_channels', 'screen_size', 'other', 'container', 'language', 'subtitle_language',
|
||||
'subtitle_language.suffix', 'subtitle_language.prefix', 'language.suffix']
|
||||
'subtitle_language.suffix', 'subtitle_language.prefix', 'language.suffix')
|
||||
|
||||
_scene_previous_tags = ['release-group-prefix']
|
||||
_scene_previous_tags = ('release-group-prefix',)
|
||||
|
||||
_scene_no_previous_tags = ('no-release-group-prefix',)
|
||||
|
||||
|
||||
class DashSeparatedReleaseGroup(Rule):
|
||||
"""
|
||||
Detect dash separated release groups that might appear at the end or at the beginning of a release name.
|
||||
|
||||
Series.S01E02.Pilot.DVDRip.x264-CS.mkv
|
||||
release_group: CS
|
||||
abc-the.title.name.1983.1080p.bluray.x264.mkv
|
||||
release_group: abc
|
||||
|
||||
At the end: Release groups should be dash-separated and shouldn't contain spaces nor
|
||||
appear in a group with other matches. The preceding matches should be separated by dot.
|
||||
If a release group is found, the conflicting matches are removed.
|
||||
|
||||
At the beginning: Release groups should be dash-separated and shouldn't contain spaces nor appear in a group.
|
||||
It should be followed by a hole with dot-separated words.
|
||||
Detection only happens if no matches exist at the beginning.
|
||||
"""
|
||||
consequence = [RemoveMatch, AppendMatch]
|
||||
|
||||
def __init__(self, value_formatter):
|
||||
"""Default constructor."""
|
||||
super(DashSeparatedReleaseGroup, self).__init__()
|
||||
self.value_formatter = value_formatter
|
||||
|
||||
@classmethod
|
||||
def is_valid(cls, matches, candidate, start, end, at_end): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Whether a candidate is a valid release group.
|
||||
"""
|
||||
if not at_end:
|
||||
if len(candidate.value) <= 1:
|
||||
return False
|
||||
|
||||
if matches.markers.at_match(candidate, predicate=lambda m: m.name == 'group'):
|
||||
return False
|
||||
|
||||
first_hole = matches.holes(candidate.end, end, predicate=lambda m: m.start == candidate.end, index=0)
|
||||
if not first_hole:
|
||||
return False
|
||||
|
||||
raw_value = first_hole.raw
|
||||
return raw_value[0] == '-' and '-' not in raw_value[1:] and '.' in raw_value and ' ' not in raw_value
|
||||
|
||||
group = matches.markers.at_match(candidate, predicate=lambda m: m.name == 'group', index=0)
|
||||
if group and matches.at_match(group, predicate=lambda m: not m.private and m.span != candidate.span):
|
||||
return False
|
||||
|
||||
count = 0
|
||||
match = candidate
|
||||
while match:
|
||||
current = matches.range(start,
|
||||
match.start,
|
||||
index=-1,
|
||||
predicate=lambda m: not m.private and not 'expected' in m.tags)
|
||||
if not current:
|
||||
break
|
||||
|
||||
separator = match.input_string[current.end:match.start]
|
||||
if not separator and match.raw[0] == '-':
|
||||
separator = '-'
|
||||
|
||||
match = current
|
||||
|
||||
if count == 0:
|
||||
if separator != '-':
|
||||
break
|
||||
|
||||
count += 1
|
||||
continue
|
||||
|
||||
if separator == '.':
|
||||
return True
|
||||
|
||||
def detect(self, matches, start, end, at_end): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Detect release group at the end or at the beginning of a filepart.
|
||||
"""
|
||||
candidate = None
|
||||
if at_end:
|
||||
container = matches.ending(end, lambda m: m.name == 'container', index=0)
|
||||
if container:
|
||||
end = container.start
|
||||
|
||||
candidate = matches.ending(end, index=0, predicate=(
|
||||
lambda m: not m.private and not (
|
||||
m.name == 'other' and 'not-a-release-group' in m.tags
|
||||
) and '-' not in m.raw and m.raw.strip() == m.raw))
|
||||
|
||||
if not candidate:
|
||||
if at_end:
|
||||
candidate = matches.holes(start, end, seps=seps, index=-1,
|
||||
predicate=lambda m: m.end == end and m.raw.strip(seps) and m.raw[0] == '-')
|
||||
else:
|
||||
candidate = matches.holes(start, end, seps=seps, index=0,
|
||||
predicate=lambda m: m.start == start and m.raw.strip(seps))
|
||||
|
||||
if candidate and self.is_valid(matches, candidate, start, end, at_end):
|
||||
return candidate
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
if matches.named('release_group'):
|
||||
return
|
||||
|
||||
to_remove = []
|
||||
to_append = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
candidate = self.detect(matches, filepart.start, filepart.end, True)
|
||||
if candidate:
|
||||
to_remove.extend(matches.at_match(candidate))
|
||||
else:
|
||||
candidate = self.detect(matches, filepart.start, filepart.end, False)
|
||||
|
||||
if candidate:
|
||||
releasegroup = Match(candidate.start, candidate.end, name='release_group',
|
||||
formatter=self.value_formatter, input_string=candidate.input_string)
|
||||
|
||||
if releasegroup.value:
|
||||
to_append.append(releasegroup)
|
||||
if to_remove or to_append:
|
||||
return to_remove, to_append
|
||||
|
||||
|
||||
class SceneReleaseGroup(Rule):
|
||||
|
@ -79,7 +210,23 @@ class SceneReleaseGroup(Rule):
|
|||
|
||||
properties = {'release_group': [None]}
|
||||
|
||||
def when(self, matches, context):
|
||||
def __init__(self, value_formatter):
|
||||
"""Default constructor."""
|
||||
super(SceneReleaseGroup, self).__init__()
|
||||
self.value_formatter = value_formatter
|
||||
|
||||
@staticmethod
|
||||
def is_previous_match(match):
|
||||
"""
|
||||
Check if match can precede release_group
|
||||
|
||||
:param match:
|
||||
:return:
|
||||
"""
|
||||
return not match.tagged(*_scene_no_previous_tags) if match.name in _scene_previous_names else \
|
||||
match.tagged(*_scene_previous_tags)
|
||||
|
||||
def when(self, matches, context): # pylint:disable=too-many-locals
|
||||
# If a release_group is found before, ignore this kind of release_group rule.
|
||||
|
||||
ret = []
|
||||
|
@ -87,6 +234,8 @@ class SceneReleaseGroup(Rule):
|
|||
for filepart in marker_sorted(matches.markers.named('path'), matches):
|
||||
# pylint:disable=cell-var-from-loop
|
||||
start, end = filepart.span
|
||||
if matches.named('release_group', predicate=lambda m: m.start >= start and m.end <= end):
|
||||
continue
|
||||
|
||||
titles = matches.named('title', predicate=lambda m: m.start >= start and m.end <= end)
|
||||
|
||||
|
@ -101,7 +250,7 @@ class SceneReleaseGroup(Rule):
|
|||
"""
|
||||
return match in titles[1:]
|
||||
|
||||
last_hole = matches.holes(start, end + 1, formatter=clean_groupname,
|
||||
last_hole = matches.holes(start, end + 1, formatter=self.value_formatter,
|
||||
ignore=keep_only_first_title,
|
||||
predicate=lambda hole: cleanup(hole.value), index=-1)
|
||||
|
||||
|
@ -118,13 +267,12 @@ class SceneReleaseGroup(Rule):
|
|||
|
||||
if match.start < filepart.start:
|
||||
return False
|
||||
return not match.private or match.name in _scene_previous_names
|
||||
return not match.private or self.is_previous_match(match)
|
||||
|
||||
previous_match = matches.previous(last_hole,
|
||||
previous_match_filter,
|
||||
index=0)
|
||||
if previous_match and (previous_match.name in _scene_previous_names or
|
||||
any(tag in previous_match.tags for tag in _scene_previous_tags)) and \
|
||||
if previous_match and (self.is_previous_match(previous_match)) and \
|
||||
not matches.input_string[previous_match.end:last_hole.start].strip(seps) \
|
||||
and not int_coercable(last_hole.value.strip(seps)):
|
||||
|
||||
|
@ -134,7 +282,7 @@ class SceneReleaseGroup(Rule):
|
|||
# if hole is inside a group marker with same value, remove [](){} ...
|
||||
group = matches.markers.at_match(last_hole, lambda marker: marker.name == 'group', 0)
|
||||
if group:
|
||||
group.formatter = clean_groupname
|
||||
group.formatter = self.value_formatter
|
||||
if group.value == last_hole.value:
|
||||
last_hole.start = group.start + 1
|
||||
last_hole.end = group.end - 1
|
||||
|
@ -165,11 +313,11 @@ class AnimeReleaseGroup(Rule):
|
|||
|
||||
# If a release_group is found before, ignore this kind of release_group rule.
|
||||
if matches.named('release_group'):
|
||||
return
|
||||
return False
|
||||
|
||||
if not matches.named('episode') and not matches.named('season') and matches.named('release_group'):
|
||||
# This doesn't seems to be an anime, and we already found another release_group.
|
||||
return
|
||||
return False
|
||||
|
||||
for filepart in marker_sorted(matches.markers.named('path'), matches):
|
||||
|
||||
|
@ -193,4 +341,7 @@ class AnimeReleaseGroup(Rule):
|
|||
to_append.append(group)
|
||||
to_remove.extend(matches.range(empty_group.start, empty_group.end,
|
||||
lambda m: 'weak-language' in m.tags))
|
||||
return to_remove, to_append
|
||||
|
||||
if to_remove or to_append:
|
||||
return to_remove, to_append
|
||||
return False
|
||||
|
|
|
@ -3,67 +3,115 @@
|
|||
"""
|
||||
screen_size property
|
||||
"""
|
||||
from rebulk.match import Match
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk, Rule, RemoveMatch
|
||||
from rebulk import Rebulk, Rule, RemoveMatch, AppendMatch
|
||||
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.quantity import FrameRate
|
||||
from ..common.validators import seps_surround
|
||||
from ..common import dash, seps
|
||||
from ...reutils import build_or_pattern
|
||||
|
||||
|
||||
def screen_size():
|
||||
def screen_size(config):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
def conflict_solver(match, other):
|
||||
"""
|
||||
Conflict solver for most screen_size.
|
||||
"""
|
||||
if other.name == 'screen_size':
|
||||
if 'resolution' in other.tags:
|
||||
# The chtouile to solve conflict in "720 x 432" string matching both 720p pattern
|
||||
int_value = _digits_re.findall(match.raw)[-1]
|
||||
if other.value.startswith(int_value):
|
||||
return match
|
||||
return other
|
||||
return '__default__'
|
||||
interlaced = frozenset(config['interlaced'])
|
||||
progressive = frozenset(config['progressive'])
|
||||
frame_rates = [re.escape(rate) for rate in config['frame_rates']]
|
||||
min_ar = config['min_ar']
|
||||
max_ar = config['max_ar']
|
||||
|
||||
rebulk = Rebulk().string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.defaults(name="screen_size", validator=seps_surround, conflict_solver=conflict_solver)
|
||||
rebulk = Rebulk()
|
||||
rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE)
|
||||
|
||||
rebulk.regex(r"(?:\d{3,}(?:x|\*))?360(?:i|p?x?)", value="360p")
|
||||
rebulk.regex(r"(?:\d{3,}(?:x|\*))?368(?:i|p?x?)", value="368p")
|
||||
rebulk.regex(r"(?:\d{3,}(?:x|\*))?480(?:i|p?x?)", value="480p")
|
||||
rebulk.regex(r"(?:\d{3,}(?:x|\*))?576(?:i|p?x?)", value="576p")
|
||||
rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:i|p?(?:50|60)?x?)", value="720p")
|
||||
rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:p(?:50|60)?x?)", value="720p")
|
||||
rebulk.regex(r"(?:\d{3,}(?:x|\*))?720p?hd", value="720p")
|
||||
rebulk.regex(r"(?:\d{3,}(?:x|\*))?900(?:i|p?x?)", value="900p")
|
||||
rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080i", value="1080i")
|
||||
rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?x?", value="1080p")
|
||||
rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080(?:p(?:50|60)?x?)", value="1080p")
|
||||
rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?hd", value="1080p")
|
||||
rebulk.regex(r"(?:\d{3,}(?:x|\*))?2160(?:i|p?x?)", value="4K")
|
||||
rebulk.string('4k', value='4K')
|
||||
rebulk.defaults(name='screen_size', validator=seps_surround, abbreviations=[dash],
|
||||
disabled=lambda context: is_disabled(context, 'screen_size'))
|
||||
|
||||
_digits_re = re.compile(r'\d+')
|
||||
frame_rate_pattern = build_or_pattern(frame_rates, name='frame_rate')
|
||||
interlaced_pattern = build_or_pattern(interlaced, name='height')
|
||||
progressive_pattern = build_or_pattern(progressive, name='height')
|
||||
|
||||
rebulk.defaults(name="screen_size", validator=seps_surround)
|
||||
rebulk.regex(r'\d{3,}-?(?:x|\*)-?\d{3,}',
|
||||
formatter=lambda value: 'x'.join(_digits_re.findall(value)),
|
||||
abbreviations=[dash],
|
||||
tags=['resolution'],
|
||||
res_pattern = r'(?:(?P<width>\d{3,4})(?:x|\*))?'
|
||||
rebulk.regex(res_pattern + interlaced_pattern + r'(?P<scan_type>i)' + frame_rate_pattern + '?')
|
||||
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)' + frame_rate_pattern + '?')
|
||||
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?(?:hd)')
|
||||
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?x?')
|
||||
rebulk.string('4k', value='2160p')
|
||||
rebulk.regex(r'(?P<width>\d{3,4})-?(?:x|\*)-?(?P<height>\d{3,4})',
|
||||
conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else other)
|
||||
|
||||
rebulk.rules(ScreenSizeOnlyOne, RemoveScreenSizeConflicts)
|
||||
rebulk.regex(frame_rate_pattern + '(p|fps)', name='frame_rate',
|
||||
formatter=FrameRate.fromstring, disabled=lambda context: is_disabled(context, 'frame_rate'))
|
||||
|
||||
rebulk.rules(PostProcessScreenSize(progressive, min_ar, max_ar), ScreenSizeOnlyOne, ResolveScreenSizeConflicts)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class PostProcessScreenSize(Rule):
|
||||
"""
|
||||
Process the screen size calculating the aspect ratio if available.
|
||||
|
||||
Convert to a standard notation (720p, 1080p, etc) when it's a standard resolution and
|
||||
aspect ratio is valid or not available.
|
||||
|
||||
It also creates an aspect_ratio match when available.
|
||||
"""
|
||||
consequence = AppendMatch
|
||||
|
||||
def __init__(self, standard_heights, min_ar, max_ar):
|
||||
super(PostProcessScreenSize, self).__init__()
|
||||
self.standard_heights = standard_heights
|
||||
self.min_ar = min_ar
|
||||
self.max_ar = max_ar
|
||||
|
||||
def when(self, matches, context):
|
||||
to_append = []
|
||||
for match in matches.named('screen_size'):
|
||||
if not is_disabled(context, 'frame_rate'):
|
||||
for frame_rate in match.children.named('frame_rate'):
|
||||
frame_rate.formatter = FrameRate.fromstring
|
||||
to_append.append(frame_rate)
|
||||
|
||||
values = match.children.to_dict()
|
||||
if 'height' not in values:
|
||||
continue
|
||||
|
||||
scan_type = (values.get('scan_type') or 'p').lower()
|
||||
height = values['height']
|
||||
if 'width' not in values:
|
||||
match.value = '{0}{1}'.format(height, scan_type)
|
||||
continue
|
||||
|
||||
width = values['width']
|
||||
calculated_ar = float(width) / float(height)
|
||||
|
||||
aspect_ratio = Match(match.start, match.end, input_string=match.input_string,
|
||||
name='aspect_ratio', value=round(calculated_ar, 3))
|
||||
|
||||
if not is_disabled(context, 'aspect_ratio'):
|
||||
to_append.append(aspect_ratio)
|
||||
|
||||
if height in self.standard_heights and self.min_ar < calculated_ar < self.max_ar:
|
||||
match.value = '{0}{1}'.format(height, scan_type)
|
||||
else:
|
||||
match.value = '{0}x{1}'.format(width, height)
|
||||
|
||||
return to_append
|
||||
|
||||
|
||||
class ScreenSizeOnlyOne(Rule):
|
||||
"""
|
||||
Keep a single screen_size pet filepath part.
|
||||
Keep a single screen_size per filepath part.
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
|
||||
|
@ -72,15 +120,15 @@ class ScreenSizeOnlyOne(Rule):
|
|||
for filepart in matches.markers.named('path'):
|
||||
screensize = list(reversed(matches.range(filepart.start, filepart.end,
|
||||
lambda match: match.name == 'screen_size')))
|
||||
if len(screensize) > 1:
|
||||
if len(screensize) > 1 and len(set((match.value for match in screensize))) > 1:
|
||||
to_remove.extend(screensize[1:])
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class RemoveScreenSizeConflicts(Rule):
|
||||
class ResolveScreenSizeConflicts(Rule):
|
||||
"""
|
||||
Remove season and episode matches which conflicts with screen_size match.
|
||||
Resolve screen_size conflicts with season and episode matches.
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
|
||||
|
@ -95,14 +143,21 @@ class RemoveScreenSizeConflicts(Rule):
|
|||
if not conflicts:
|
||||
continue
|
||||
|
||||
has_neighbor = False
|
||||
video_profile = matches.range(screensize.end, filepart.end, lambda match: match.name == 'video_profile', 0)
|
||||
if video_profile and not matches.holes(screensize.end, video_profile.start,
|
||||
predicate=lambda h: h.value and h.value.strip(seps)):
|
||||
to_remove.extend(conflicts)
|
||||
has_neighbor = True
|
||||
|
||||
date = matches.previous(screensize, lambda match: match.name == 'date', 0)
|
||||
if date and not matches.holes(date.end, screensize.start,
|
||||
predicate=lambda h: h.value and h.value.strip(seps)):
|
||||
previous = matches.previous(screensize, index=0, predicate=(
|
||||
lambda m: m.name in ('date', 'source', 'other', 'streaming_service')))
|
||||
if previous and not matches.holes(previous.end, screensize.start,
|
||||
predicate=lambda h: h.value and h.value.strip(seps)):
|
||||
to_remove.extend(conflicts)
|
||||
has_neighbor = True
|
||||
|
||||
if not has_neighbor:
|
||||
to_remove.append(screensize)
|
||||
|
||||
return to_remove
|
||||
|
|
|
@ -7,23 +7,24 @@ import re
|
|||
|
||||
from rebulk import Rebulk
|
||||
|
||||
from ..common.validators import seps_surround
|
||||
from ..common import dash
|
||||
from ..common.quantity import Size
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def size():
|
||||
def size(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
|
||||
def format_size(value):
|
||||
"""Format size using uppercase and no space."""
|
||||
return re.sub(r'(?<=\d)[.](?=[^\d])', '', value.upper())
|
||||
|
||||
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'size'))
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
||||
rebulk.defaults(name='size', validator=seps_surround)
|
||||
rebulk.regex(r'\d+\.?[mgt]b', r'\d+\.\d+[mgt]b', formatter=format_size, tags=['release-group-prefix'])
|
||||
rebulk.regex(r'\d+-?[mgt]b', r'\d+\.\d+-?[mgt]b', formatter=Size.fromstring, tags=['release-group-prefix'])
|
||||
|
||||
return rebulk
|
||||
|
|
235
libs/guessit/rules/properties/source.py
Normal file
235
libs/guessit/rules/properties/source.py
Normal file
|
@ -0,0 +1,235 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
source property
|
||||
"""
|
||||
import copy
|
||||
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import AppendMatch, Rebulk, RemoveMatch, Rule
|
||||
|
||||
from .audio_codec import HqConflictRule
|
||||
from ..common import dash, seps
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_before, seps_after, or_
|
||||
|
||||
|
||||
def source(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'source'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], private_parent=True, children=True)
|
||||
rebulk = rebulk.defaults(name='source',
|
||||
tags=['video-codec-prefix', 'streaming_service.suffix'],
|
||||
validate_all=True,
|
||||
validator={'__parent__': or_(seps_before, seps_after)})
|
||||
|
||||
rip_prefix = '(?P<other>Rip)-?'
|
||||
rip_suffix = '-?(?P<other>Rip)'
|
||||
rip_optional_suffix = '(?:' + rip_suffix + ')?'
|
||||
|
||||
def build_source_pattern(*patterns, **kwargs):
|
||||
"""Helper pattern to build source pattern."""
|
||||
prefix_format = kwargs.get('prefix') or ''
|
||||
suffix_format = kwargs.get('suffix') or ''
|
||||
|
||||
string_format = prefix_format + '({0})' + suffix_format
|
||||
return [string_format.format(pattern) for pattern in patterns]
|
||||
|
||||
def demote_other(match, other): # pylint: disable=unused-argument
|
||||
"""Default conflict solver with 'other' property."""
|
||||
return other if other.name == 'other' or other.name == 'release_group' else '__default__'
|
||||
|
||||
rebulk.regex(*build_source_pattern('VHS', suffix=rip_optional_suffix),
|
||||
value={'source': 'VHS', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('CAM', suffix=rip_optional_suffix),
|
||||
value={'source': 'Camera', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('HD-?CAM', suffix=rip_optional_suffix),
|
||||
value={'source': 'HD Camera', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('TELESYNC', 'TS', suffix=rip_optional_suffix),
|
||||
value={'source': 'Telesync', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('HD-?TELESYNC', 'HD-?TS', suffix=rip_optional_suffix),
|
||||
value={'source': 'HD Telesync', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('WORKPRINT', 'WP'), value='Workprint')
|
||||
rebulk.regex(*build_source_pattern('TELECINE', 'TC', suffix=rip_optional_suffix),
|
||||
value={'source': 'Telecine', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('HD-?TELECINE', 'HD-?TC', suffix=rip_optional_suffix),
|
||||
value={'source': 'HD Telecine', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('PPV', suffix=rip_optional_suffix),
|
||||
value={'source': 'Pay-per-view', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('SD-?TV', suffix=rip_optional_suffix),
|
||||
value={'source': 'TV', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('TV', suffix=rip_suffix), # TV is too common to allow matching
|
||||
value={'source': 'TV', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('TV', 'SD-?TV', prefix=rip_prefix),
|
||||
value={'source': 'TV', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('TV-?(?=Dub)'), value='TV')
|
||||
rebulk.regex(*build_source_pattern('DVB', 'PD-?TV', suffix=rip_optional_suffix),
|
||||
value={'source': 'Digital TV', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('DVD', suffix=rip_optional_suffix),
|
||||
value={'source': 'DVD', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('DM', suffix=rip_optional_suffix),
|
||||
value={'source': 'Digital Master', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('VIDEO-?TS', 'DVD-?R(?:$|(?!E))', # 'DVD-?R(?:$|^E)' => DVD-Real ...
|
||||
'DVD-?9', 'DVD-?5'), value='DVD')
|
||||
|
||||
rebulk.regex(*build_source_pattern('HD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other,
|
||||
value={'source': 'HDTV', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('TV-?HD', suffix=rip_suffix), conflict_solver=demote_other,
|
||||
value={'source': 'HDTV', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('TV', suffix='-?(?P<other>Rip-?HD)'), conflict_solver=demote_other,
|
||||
value={'source': 'HDTV', 'other': 'Rip'})
|
||||
|
||||
rebulk.regex(*build_source_pattern('VOD', suffix=rip_optional_suffix),
|
||||
value={'source': 'Video on Demand', 'other': 'Rip'})
|
||||
|
||||
rebulk.regex(*build_source_pattern('WEB', 'WEB-?DL', suffix=rip_suffix),
|
||||
value={'source': 'Web', 'other': 'Rip'})
|
||||
# WEBCap is a synonym to WEBRip, mostly used by non english
|
||||
rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)', suffix=rip_optional_suffix),
|
||||
value={'source': 'Web', 'other': 'Rip', 'another': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'DL-?WEB', 'DL(?=-?Mux)'),
|
||||
value={'source': 'Web'})
|
||||
rebulk.regex('(WEB)', value='Web', tags='weak.source')
|
||||
|
||||
rebulk.regex(*build_source_pattern('HD-?DVD', suffix=rip_optional_suffix),
|
||||
value={'source': 'HD-DVD', 'other': 'Rip'})
|
||||
|
||||
rebulk.regex(*build_source_pattern('Blu-?ray', 'BD', 'BD[59]', 'BD25', 'BD50', suffix=rip_optional_suffix),
|
||||
value={'source': 'Blu-ray', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('(?P<another>BR)-?(?=Scr(?:eener)?)', '(?P<another>BR)-?(?=Mux)'), # BRRip
|
||||
value={'source': 'Blu-ray', 'another': 'Reencoded'})
|
||||
rebulk.regex(*build_source_pattern('(?P<another>BR)', suffix=rip_suffix), # BRRip
|
||||
value={'source': 'Blu-ray', 'other': 'Rip', 'another': 'Reencoded'})
|
||||
|
||||
rebulk.regex(*build_source_pattern('Ultra-?Blu-?ray', 'Blu-?ray-?Ultra'), value='Ultra HD Blu-ray')
|
||||
|
||||
rebulk.regex(*build_source_pattern('AHDTV'), value='Analog HDTV')
|
||||
rebulk.regex(*build_source_pattern('UHD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other,
|
||||
value={'source': 'Ultra HDTV', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('UHD', suffix=rip_suffix), conflict_solver=demote_other,
|
||||
value={'source': 'Ultra HDTV', 'other': 'Rip'})
|
||||
|
||||
rebulk.regex(*build_source_pattern('DSR', 'DTH', suffix=rip_optional_suffix),
|
||||
value={'source': 'Satellite', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix),
|
||||
value={'source': 'Satellite', 'other': 'Rip'})
|
||||
|
||||
rebulk.rules(ValidateSourcePrefixSuffix, ValidateWeakSource, UltraHdBlurayRule)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class UltraHdBlurayRule(Rule):
|
||||
"""
|
||||
Replace other:Ultra HD and source:Blu-ray with source:Ultra HD Blu-ray
|
||||
"""
|
||||
dependency = HqConflictRule
|
||||
consequence = [RemoveMatch, AppendMatch]
|
||||
|
||||
@classmethod
|
||||
def find_ultrahd(cls, matches, start, end, index):
|
||||
"""Find Ultra HD match."""
|
||||
return matches.range(start, end, index=index, predicate=(
|
||||
lambda m: not m.private and m.name == 'other' and m.value == 'Ultra HD'
|
||||
))
|
||||
|
||||
@classmethod
|
||||
def validate_range(cls, matches, start, end):
|
||||
"""Validate no holes or invalid matches exist in the specified range."""
|
||||
return (
|
||||
not matches.holes(start, end, predicate=lambda m: m.value.strip(seps)) and
|
||||
not matches.range(start, end, predicate=(
|
||||
lambda m: not m.private and (
|
||||
m.name not in ('screen_size', 'color_depth') and (
|
||||
m.name != 'other' or 'uhdbluray-neighbor' not in m.tags))))
|
||||
)
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
to_append = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
for match in matches.range(filepart.start, filepart.end, predicate=(
|
||||
lambda m: not m.private and m.name == 'source' and m.value == 'Blu-ray')):
|
||||
other = self.find_ultrahd(matches, filepart.start, match.start, -1)
|
||||
if not other or not self.validate_range(matches, other.end, match.start):
|
||||
other = self.find_ultrahd(matches, match.end, filepart.end, 0)
|
||||
if not other or not self.validate_range(matches, match.end, other.start):
|
||||
if not matches.range(filepart.start, filepart.end, predicate=(
|
||||
lambda m: m.name == 'screen_size' and m.value == '2160p')):
|
||||
continue
|
||||
|
||||
if other:
|
||||
other.private = True
|
||||
|
||||
new_source = copy.copy(match)
|
||||
new_source.value = 'Ultra HD Blu-ray'
|
||||
to_remove.append(match)
|
||||
to_append.append(new_source)
|
||||
|
||||
if to_remove or to_append:
|
||||
return to_remove, to_append
|
||||
return False
|
||||
|
||||
|
||||
class ValidateSourcePrefixSuffix(Rule):
|
||||
"""
|
||||
Validate source with source prefix, source suffix.
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
|
||||
match = match.initiator
|
||||
if not seps_before(match) and \
|
||||
not matches.range(match.start - 1, match.start - 2,
|
||||
lambda m: 'source-prefix' in m.tags):
|
||||
if match.children:
|
||||
ret.extend(match.children)
|
||||
ret.append(match)
|
||||
continue
|
||||
if not seps_after(match) and \
|
||||
not matches.range(match.end, match.end + 1,
|
||||
lambda m: 'source-suffix' in m.tags):
|
||||
if match.children:
|
||||
ret.extend(match.children)
|
||||
ret.append(match)
|
||||
continue
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
class ValidateWeakSource(Rule):
|
||||
"""
|
||||
Validate weak source
|
||||
"""
|
||||
dependency = [ValidateSourcePrefixSuffix]
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
|
||||
# if there are more than 1 source in this filepart, just before the year and with holes for the title
|
||||
# most likely the source is part of the title
|
||||
if 'weak.source' in match.tags \
|
||||
and matches.range(match.end, filepart.end, predicate=lambda m: m.name == 'source') \
|
||||
and matches.holes(filepart.start, match.start,
|
||||
predicate=lambda m: m.value.strip(seps), index=-1):
|
||||
if match.children:
|
||||
ret.extend(match.children)
|
||||
ret.append(match)
|
||||
continue
|
||||
|
||||
return ret
|
|
@ -8,64 +8,30 @@ import re
|
|||
from rebulk import Rebulk
|
||||
from rebulk.rules import Rule, RemoveMatch
|
||||
|
||||
from ..common.pattern import is_disabled
|
||||
from ...rules.common import seps, dash
|
||||
from ...rules.common.validators import seps_before, seps_after
|
||||
|
||||
|
||||
def streaming_service():
|
||||
def streaming_service(config): # pylint: disable=too-many-statements,unused-argument
|
||||
"""Streaming service property.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return:
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk().string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
||||
rebulk.defaults(name='streaming_service', tags=['format-prefix'])
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'streaming_service'))
|
||||
rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
||||
rebulk.defaults(name='streaming_service', tags=['source-prefix'])
|
||||
|
||||
rebulk.string('AE', 'A&E', value='A&E')
|
||||
rebulk.string('AMBC', value='ABC')
|
||||
rebulk.string('AMC', value='AMC')
|
||||
rebulk.string('AMZN', 'AmazonPrime', value='Amazon Prime')
|
||||
rebulk.regex('Amazon-Prime', value='Amazon Prime')
|
||||
rebulk.string('AS', 'AdultSwim', value='Adult Swim')
|
||||
rebulk.regex('Adult-Swim', value='Adult Swim')
|
||||
rebulk.string('iP', 'BBCiPlayer', value='BBC iPlayer')
|
||||
rebulk.regex('BBC-iPlayer', value='BBC iPlayer')
|
||||
rebulk.string('CBS', value='CBS')
|
||||
rebulk.string('CC', 'ComedyCentral', value='Comedy Central')
|
||||
rebulk.regex('Comedy-Central', value='Comedy Central')
|
||||
rebulk.string('CR', 'CrunchyRoll', value='Crunchy Roll')
|
||||
rebulk.regex('Crunchy-Roll', value='Crunchy Roll')
|
||||
rebulk.string('CW', 'TheCW', value='The CW')
|
||||
rebulk.regex('The-CW', value='The CW')
|
||||
rebulk.string('DISC', 'Discovery', value='Discovery')
|
||||
rebulk.string('DIY', value='DIY Network')
|
||||
rebulk.string('DSNY', 'Disney', value='Disney')
|
||||
rebulk.string('EPIX', 'ePix', value='ePix')
|
||||
rebulk.string('HBO', 'HBOGo', value='HBO Go')
|
||||
rebulk.regex('HBO-Go', value='HBO Go')
|
||||
rebulk.string('HIST', 'History', value='History')
|
||||
rebulk.string('ID', value='Investigation Discovery')
|
||||
rebulk.string('IFC', 'IFC', value='IFC')
|
||||
rebulk.string('PBS', 'PBS', value='PBS')
|
||||
rebulk.string('NATG', 'NationalGeographic', value='National Geographic')
|
||||
rebulk.regex('National-Geographic', value='National Geographic')
|
||||
rebulk.string('NBA', 'NBATV', value='NBA TV')
|
||||
rebulk.regex('NBA-TV', value='NBA TV')
|
||||
rebulk.string('NBC', value='NBC')
|
||||
rebulk.string('NFL', value='NFL')
|
||||
rebulk.string('NICK', 'Nickelodeon', value='Nickelodeon')
|
||||
rebulk.string('NF', 'Netflix', value='Netflix')
|
||||
rebulk.string('iTunes', value='iTunes')
|
||||
rebulk.string('RTE', value='RTÉ One')
|
||||
rebulk.string('SESO', 'SeeSo', value='SeeSo')
|
||||
rebulk.string('SPKE', 'SpikeTV', 'Spike TV', value='Spike TV')
|
||||
rebulk.string('SYFY', 'Syfy', value='Syfy')
|
||||
rebulk.string('TFOU', 'TFou', value='TFou')
|
||||
rebulk.string('TLC', value='TLC')
|
||||
rebulk.string('TV3', value='TV3 Ireland')
|
||||
rebulk.string('TV4', value='TV4 Sweeden')
|
||||
rebulk.string('TVL', 'TVLand', 'TV Land', value='TV Land')
|
||||
rebulk.string('UFC', value='UFC')
|
||||
rebulk.string('USAN', value='USA Network')
|
||||
for value, items in config.items():
|
||||
patterns = items if isinstance(items, list) else [items]
|
||||
for pattern in patterns:
|
||||
if pattern.startswith('re:'):
|
||||
rebulk.regex(pattern, value=value)
|
||||
else:
|
||||
rebulk.string(pattern, value=value)
|
||||
|
||||
rebulk.rules(ValidateStreamingService)
|
||||
|
||||
|
@ -75,11 +41,11 @@ def streaming_service():
|
|||
class ValidateStreamingService(Rule):
|
||||
"""Validate streaming service matches."""
|
||||
|
||||
priority = 32
|
||||
priority = 128
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
"""Streaming service is always before format.
|
||||
"""Streaming service is always before source.
|
||||
|
||||
:param matches:
|
||||
:type matches: rebulk.match.Matches
|
||||
|
@ -93,16 +59,20 @@ class ValidateStreamingService(Rule):
|
|||
previous_match = matches.previous(service, lambda match: 'streaming_service.prefix' in match.tags, 0)
|
||||
has_other = service.initiator and service.initiator.children.named('other')
|
||||
|
||||
if not has_other and \
|
||||
(not next_match or matches.holes(service.end, next_match.start,
|
||||
predicate=lambda match: match.value.strip(seps))) and \
|
||||
(not previous_match or matches.holes(previous_match.end, service.start,
|
||||
predicate=lambda match: match.value.strip(seps))):
|
||||
to_remove.append(service)
|
||||
continue
|
||||
if not has_other:
|
||||
if (not next_match or
|
||||
matches.holes(service.end, next_match.start,
|
||||
predicate=lambda match: match.value.strip(seps)) or
|
||||
not seps_before(service)):
|
||||
if (not previous_match or
|
||||
matches.holes(previous_match.end, service.start,
|
||||
predicate=lambda match: match.value.strip(seps)) or
|
||||
not seps_after(service)):
|
||||
to_remove.append(service)
|
||||
continue
|
||||
|
||||
if service.value == 'Comedy Central':
|
||||
# Current match is a valid streaming service, removing invalid closed caption (CC) matches
|
||||
to_remove.extend(matches.named('other', predicate=lambda match: match.value == 'CC'))
|
||||
# Current match is a valid streaming service, removing invalid Criterion Collection (CC) matches
|
||||
to_remove.extend(matches.named('edition', predicate=lambda match: match.value == 'Criterion'))
|
||||
|
||||
return to_remove
|
||||
|
|
|
@ -8,21 +8,31 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, AppendTags
|
|||
from rebulk.formatters import formatters
|
||||
|
||||
from .film import FilmTitleRule
|
||||
from .language import SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule
|
||||
from .language import (
|
||||
SubtitlePrefixLanguageRule,
|
||||
SubtitleSuffixLanguageRule,
|
||||
SubtitleExtensionRule,
|
||||
NON_SPECIFIC_LANGUAGES
|
||||
)
|
||||
from ..common import seps, title_seps
|
||||
from ..common.comparators import marker_sorted
|
||||
from ..common.expected import build_expected_function
|
||||
from ..common.formatters import cleanup, reorder_title
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def title():
|
||||
def title(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk().rules(TitleFromPosition, PreferTitleWithYear)
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'title'))
|
||||
rebulk.rules(TitleFromPosition, PreferTitleWithYear)
|
||||
|
||||
expected_title = build_expected_function('expected_title')
|
||||
|
||||
|
@ -83,18 +93,25 @@ class TitleBaseRule(Rule):
|
|||
:rtype:
|
||||
"""
|
||||
cropped_holes = []
|
||||
group_markers = matches.markers.named('group')
|
||||
for group_marker in group_markers:
|
||||
path_marker = matches.markers.at_match(group_marker, predicate=lambda m: m.name == 'path', index=0)
|
||||
if path_marker and path_marker.span == group_marker.span:
|
||||
group_markers.remove(group_marker)
|
||||
|
||||
for hole in holes:
|
||||
group_markers = matches.markers.named('group')
|
||||
cropped_holes.extend(hole.crop(group_markers))
|
||||
|
||||
return cropped_holes
|
||||
|
||||
def is_ignored(self, match):
|
||||
@staticmethod
|
||||
def is_ignored(match):
|
||||
"""
|
||||
Ignore matches when scanning for title (hole).
|
||||
|
||||
Full word language and countries won't be ignored if they are uppercase.
|
||||
"""
|
||||
return not (len(match) > 3 and match.raw.isupper()) and match.name in ['language', 'country', 'episode_details']
|
||||
return not (len(match) > 3 and match.raw.isupper()) and match.name in ('language', 'country', 'episode_details')
|
||||
|
||||
def should_keep(self, match, to_keep, matches, filepart, hole, starting):
|
||||
"""
|
||||
|
@ -114,7 +131,7 @@ class TitleBaseRule(Rule):
|
|||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if match.name in ['language', 'country']:
|
||||
if match.name in ('language', 'country'):
|
||||
# Keep language if exactly matching the hole.
|
||||
if len(hole.value) == len(match.raw):
|
||||
return True
|
||||
|
@ -125,9 +142,10 @@ class TitleBaseRule(Rule):
|
|||
for outside in outside_matches:
|
||||
other_languages.extend(matches.range(outside.start, outside.end,
|
||||
lambda c_match: c_match.name == match.name and
|
||||
c_match not in to_keep))
|
||||
c_match not in to_keep and
|
||||
c_match.value not in NON_SPECIFIC_LANGUAGES))
|
||||
|
||||
if not other_languages:
|
||||
if not other_languages and (not starting or len(match.raw) <= 3):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
@ -145,7 +163,7 @@ class TitleBaseRule(Rule):
|
|||
return match.start >= hole.start and match.end <= hole.end
|
||||
return True
|
||||
|
||||
def check_titles_in_filepart(self, filepart, matches, context):
|
||||
def check_titles_in_filepart(self, filepart, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Find title in filepart (ignoring language)
|
||||
"""
|
||||
|
@ -154,12 +172,11 @@ class TitleBaseRule(Rule):
|
|||
|
||||
holes = matches.holes(start, end + 1, formatter=formatters(cleanup, reorder_title),
|
||||
ignore=self.is_ignored,
|
||||
predicate=lambda hole: hole.value)
|
||||
predicate=lambda m: m.value)
|
||||
|
||||
holes = self.holes_process(holes, matches)
|
||||
|
||||
for hole in holes:
|
||||
# pylint:disable=cell-var-from-loop
|
||||
if not hole or (self.hole_filter and not self.hole_filter(hole, matches)):
|
||||
continue
|
||||
|
||||
|
@ -170,8 +187,8 @@ class TitleBaseRule(Rule):
|
|||
|
||||
if ignored_matches:
|
||||
for ignored_match in reversed(ignored_matches):
|
||||
# pylint:disable=undefined-loop-variable
|
||||
trailing = matches.chain_before(hole.end, seps, predicate=lambda match: match == ignored_match)
|
||||
# pylint:disable=undefined-loop-variable, cell-var-from-loop
|
||||
trailing = matches.chain_before(hole.end, seps, predicate=lambda m: m == ignored_match)
|
||||
if trailing:
|
||||
should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, False)
|
||||
if should_keep:
|
||||
|
@ -188,7 +205,7 @@ class TitleBaseRule(Rule):
|
|||
for ignored_match in ignored_matches:
|
||||
if ignored_match not in to_keep:
|
||||
starting = matches.chain_after(hole.start, seps,
|
||||
predicate=lambda match: match == ignored_match)
|
||||
predicate=lambda m: m == ignored_match)
|
||||
if starting:
|
||||
should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, True)
|
||||
if should_keep:
|
||||
|
@ -214,7 +231,7 @@ class TitleBaseRule(Rule):
|
|||
hole.tags = self.match_tags
|
||||
if self.alternative_match_name:
|
||||
# Split and keep values that can be a title
|
||||
titles = hole.split(title_seps, lambda match: match.value)
|
||||
titles = hole.split(title_seps, lambda m: m.value)
|
||||
for title_match in list(titles[1:]):
|
||||
previous_title = titles[titles.index(title_match) - 1]
|
||||
separator = matches.input_string[previous_title.end:title_match.start]
|
||||
|
@ -231,14 +248,15 @@ class TitleBaseRule(Rule):
|
|||
return titles, to_remove
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
to_remove = []
|
||||
|
||||
if matches.named(self.match_name, lambda match: 'expected' in match.tags):
|
||||
return
|
||||
return False
|
||||
|
||||
fileparts = [filepart for filepart in list(marker_sorted(matches.markers.named('path'), matches))
|
||||
if not self.filepart_filter or self.filepart_filter(filepart, matches)]
|
||||
|
||||
to_remove = []
|
||||
|
||||
# Priorize fileparts containing the year
|
||||
years_fileparts = []
|
||||
for filepart in fileparts:
|
||||
|
@ -246,7 +264,6 @@ class TitleBaseRule(Rule):
|
|||
if year_match:
|
||||
years_fileparts.append(filepart)
|
||||
|
||||
ret = []
|
||||
for filepart in fileparts:
|
||||
try:
|
||||
years_fileparts.remove(filepart)
|
||||
|
@ -268,7 +285,9 @@ class TitleBaseRule(Rule):
|
|||
ret.extend(titles)
|
||||
to_remove.extend(to_remove_c)
|
||||
|
||||
return ret, to_remove
|
||||
if ret or to_remove:
|
||||
return ret, to_remove
|
||||
return False
|
||||
|
||||
|
||||
class TitleFromPosition(TitleBaseRule):
|
||||
|
@ -282,6 +301,9 @@ class TitleFromPosition(TitleBaseRule):
|
|||
def __init__(self):
|
||||
super(TitleFromPosition, self).__init__('title', ['title'], 'alternative_title')
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'alternative_title')
|
||||
|
||||
|
||||
class PreferTitleWithYear(Rule):
|
||||
"""
|
||||
|
@ -302,7 +324,7 @@ class PreferTitleWithYear(Rule):
|
|||
if filepart:
|
||||
year_match = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year', 0)
|
||||
if year_match:
|
||||
group = matches.markers.at_match(year_match, lambda group: group.name == 'group')
|
||||
group = matches.markers.at_match(year_match, lambda m: m.name == 'group')
|
||||
if group:
|
||||
with_year_in_group.append(title_match)
|
||||
else:
|
||||
|
@ -310,16 +332,18 @@ class PreferTitleWithYear(Rule):
|
|||
|
||||
to_tag = []
|
||||
if with_year_in_group:
|
||||
title_values = set([title_match.value for title_match in with_year_in_group])
|
||||
title_values = {title_match.value for title_match in with_year_in_group}
|
||||
to_tag.extend(with_year_in_group)
|
||||
elif with_year:
|
||||
title_values = set([title_match.value for title_match in with_year])
|
||||
title_values = {title_match.value for title_match in with_year}
|
||||
to_tag.extend(with_year)
|
||||
else:
|
||||
title_values = set([title_match.value for title_match in titles])
|
||||
title_values = {title_match.value for title_match in titles}
|
||||
|
||||
to_remove = []
|
||||
for title_match in titles:
|
||||
if title_match.value not in title_values:
|
||||
to_remove.append(title_match)
|
||||
return to_remove, to_tag
|
||||
if to_remove or to_tag:
|
||||
return to_remove, to_tag
|
||||
return False
|
||||
|
|
|
@ -6,6 +6,7 @@ type property
|
|||
from rebulk import CustomRule, Rebulk, POST_PROCESS
|
||||
from rebulk.match import Match
|
||||
|
||||
from ..common.pattern import is_disabled
|
||||
from ...rules.processors import Processors
|
||||
|
||||
|
||||
|
@ -19,13 +20,19 @@ def _type(matches, value):
|
|||
matches.append(Match(len(matches.input_string), len(matches.input_string), name='type', value=value))
|
||||
|
||||
|
||||
def type_():
|
||||
def type_(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
return Rebulk().rules(TypeProcessor)
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'type'))
|
||||
rebulk = rebulk.rules(TypeProcessor)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class TypeProcessor(CustomRule):
|
||||
|
@ -45,9 +52,10 @@ class TypeProcessor(CustomRule):
|
|||
|
||||
episode = matches.named('episode')
|
||||
season = matches.named('season')
|
||||
absolute_episode = matches.named('absolute_episode')
|
||||
episode_details = matches.named('episode_details')
|
||||
|
||||
if episode or season or episode_details:
|
||||
if episode or season or episode_details or absolute_episode:
|
||||
return 'episode'
|
||||
|
||||
film = matches.named('film')
|
||||
|
|
|
@ -3,47 +3,76 @@
|
|||
"""
|
||||
video_codec and video_profile property
|
||||
"""
|
||||
from rebulk import Rebulk, Rule, RemoveMatch
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk, Rule, RemoveMatch
|
||||
|
||||
from ..common import dash
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_after, seps_before, seps_surround
|
||||
|
||||
|
||||
def video_codec():
|
||||
def video_codec(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
|
||||
rebulk.defaults(name="video_codec", tags=['format-suffix', 'streaming_service.suffix'])
|
||||
rebulk = Rebulk()
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
|
||||
rebulk.defaults(name="video_codec",
|
||||
tags=['source-suffix', 'streaming_service.suffix'],
|
||||
disabled=lambda context: is_disabled(context, 'video_codec'))
|
||||
|
||||
rebulk.regex(r"Rv\d{2}", value="Real")
|
||||
rebulk.regex("Mpeg2", value="Mpeg2")
|
||||
rebulk.regex("DVDivX", "DivX", value="DivX")
|
||||
rebulk.regex("XviD", value="XviD")
|
||||
rebulk.regex("[hx]-?264(?:-?AVC(HD)?)?", "MPEG-?4(?:-?AVC(HD)?)", "AVC(?:HD)?", value="h264")
|
||||
rebulk.regex("[hx]-?265(?:-?HEVC)?", "HEVC", value="h265")
|
||||
rebulk.regex('(?P<video_codec>hevc)(?P<video_profile>10)', value={'video_codec': 'h265', 'video_profile': '10bit'},
|
||||
rebulk.regex(r'Rv\d{2}', value='RealVideo')
|
||||
rebulk.regex('Mpe?g-?2', '[hx]-?262', value='MPEG-2')
|
||||
rebulk.string("DVDivX", "DivX", value="DivX")
|
||||
rebulk.string('XviD', value='Xvid')
|
||||
rebulk.regex('VC-?1', value='VC-1')
|
||||
rebulk.string('VP7', value='VP7')
|
||||
rebulk.string('VP8', 'VP80', value='VP8')
|
||||
rebulk.string('VP9', value='VP9')
|
||||
rebulk.regex('[hx]-?263', value='H.263')
|
||||
rebulk.regex('[hx]-?264', '(MPEG-?4)?AVC(?:HD)?', value='H.264')
|
||||
rebulk.regex('[hx]-?265', 'HEVC', value='H.265')
|
||||
rebulk.regex('(?P<video_codec>hevc)(?P<color_depth>10)', value={'video_codec': 'H.265', 'color_depth': '10-bit'},
|
||||
tags=['video-codec-suffix'], children=True)
|
||||
|
||||
# http://blog.mediacoderhq.com/h264-profiles-and-levels/
|
||||
# http://fr.wikipedia.org/wiki/H.264
|
||||
rebulk.defaults(name="video_profile", validator=seps_surround)
|
||||
# https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC
|
||||
rebulk.defaults(clear=True,
|
||||
name="video_profile",
|
||||
validator=seps_surround,
|
||||
disabled=lambda context: is_disabled(context, 'video_profile'))
|
||||
|
||||
rebulk.regex('10.?bits?', 'Hi10P?', 'YUV420P10', value='10bit')
|
||||
rebulk.regex('8.?bits?', value='8bit')
|
||||
rebulk.string('BP', value='Baseline', tags='video_profile.rule')
|
||||
rebulk.string('XP', 'EP', value='Extended', tags='video_profile.rule')
|
||||
rebulk.string('MP', value='Main', tags='video_profile.rule')
|
||||
rebulk.string('HP', 'HiP', value='High', tags='video_profile.rule')
|
||||
|
||||
rebulk.string('BP', value='BP', tags='video_profile.rule')
|
||||
rebulk.string('XP', 'EP', value='XP', tags='video_profile.rule')
|
||||
rebulk.string('MP', value='MP', tags='video_profile.rule')
|
||||
rebulk.string('HP', 'HiP', value='HP', tags='video_profile.rule')
|
||||
rebulk.regex('Hi422P', value='Hi422P', tags='video_profile.rule')
|
||||
rebulk.regex('Hi444PP', value='Hi444PP', tags='video_profile.rule')
|
||||
# https://en.wikipedia.org/wiki/Scalable_Video_Coding
|
||||
rebulk.string('SC', 'SVC', value='Scalable Video Coding', tags='video_profile.rule')
|
||||
# https://en.wikipedia.org/wiki/AVCHD
|
||||
rebulk.regex('AVC(?:HD)?', value='Advanced Video Codec High Definition', tags='video_profile.rule')
|
||||
# https://en.wikipedia.org/wiki/H.265/HEVC
|
||||
rebulk.string('HEVC', value='High Efficiency Video Coding', tags='video_profile.rule')
|
||||
|
||||
rebulk.string('DXVA', value='DXVA', name='video_api')
|
||||
rebulk.regex('Hi422P', value='High 4:2:2')
|
||||
rebulk.regex('Hi444PP', value='High 4:4:4 Predictive')
|
||||
rebulk.regex('Hi10P?', value='High 10') # no profile validation is required
|
||||
|
||||
rebulk.string('DXVA', value='DXVA', name='video_api',
|
||||
disabled=lambda context: is_disabled(context, 'video_api'))
|
||||
|
||||
rebulk.defaults(clear=True,
|
||||
name='color_depth',
|
||||
validator=seps_surround,
|
||||
disabled=lambda context: is_disabled(context, 'color_depth'))
|
||||
rebulk.regex('12.?bits?', value='12-bit')
|
||||
rebulk.regex('10.?bits?', 'YUV420P10', 'Hi10P?', value='10-bit')
|
||||
rebulk.regex('8.?bits?', value='8-bit')
|
||||
|
||||
rebulk.rules(ValidateVideoCodec, VideoProfileRule)
|
||||
|
||||
|
@ -52,11 +81,14 @@ def video_codec():
|
|||
|
||||
class ValidateVideoCodec(Rule):
|
||||
"""
|
||||
Validate video_codec with format property or separated
|
||||
Validate video_codec with source property or separated
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'video_codec')
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for codec in matches.named('video_codec'):
|
||||
|
@ -77,11 +109,16 @@ class VideoProfileRule(Rule):
|
|||
"""
|
||||
consequence = RemoveMatch
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'video_profile')
|
||||
|
||||
def when(self, matches, context):
|
||||
profile_list = matches.named('video_profile', lambda match: 'video_profile.rule' in match.tags)
|
||||
ret = []
|
||||
for profile in profile_list:
|
||||
codec = matches.previous(profile, lambda match: match.name == 'video_codec')
|
||||
codec = matches.at_span(profile.span, lambda match: match.name == 'video_codec', 0)
|
||||
if not codec:
|
||||
codec = matches.previous(profile, lambda match: match.name == 'video_codec')
|
||||
if not codec:
|
||||
codec = matches.next(profile, lambda match: match.name == 'video_codec')
|
||||
if not codec:
|
||||
|
|
|
@ -9,28 +9,35 @@ from rebulk.remodule import re
|
|||
from rebulk import Rebulk, Rule, RemoveMatch
|
||||
from ..common import seps
|
||||
from ..common.formatters import cleanup
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
from ...reutils import build_or_pattern
|
||||
|
||||
|
||||
def website():
|
||||
def website(config):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'website'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
|
||||
rebulk.defaults(name="website")
|
||||
|
||||
tlds = [l.strip().decode('utf-8')
|
||||
for l in resource_stream('guessit', 'tlds-alpha-by-domain.txt').readlines()
|
||||
if b'--' not in l][1:] # All registered domain extension
|
||||
with resource_stream('guessit', 'tlds-alpha-by-domain.txt') as tld_file:
|
||||
tlds = [
|
||||
tld.strip().decode('utf-8')
|
||||
for tld in tld_file.readlines()
|
||||
if b'--' not in tld
|
||||
][1:] # All registered domain extension
|
||||
|
||||
safe_tlds = ['com', 'org', 'net'] # For sure a website extension
|
||||
safe_subdomains = ['www'] # For sure a website subdomain
|
||||
safe_prefix = ['co', 'com', 'org', 'net'] # Those words before a tlds are sure
|
||||
|
||||
website_prefixes = ['from']
|
||||
safe_tlds = config['safe_tlds'] # For sure a website extension
|
||||
safe_subdomains = config['safe_subdomains'] # For sure a website subdomain
|
||||
safe_prefix = config['safe_prefixes'] # Those words before a tlds are sure
|
||||
website_prefixes = config['prefixes']
|
||||
|
||||
rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
|
||||
r'\.)+(?:[a-z-]+\.)+(?:'+build_or_pattern(tlds) +
|
||||
|
@ -60,7 +67,7 @@ def website():
|
|||
"""
|
||||
Validator for next website matches
|
||||
"""
|
||||
return any(name in ['season', 'episode', 'year'] for name in match.names)
|
||||
return match.named('season', 'episode', 'year')
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
|
@ -73,7 +80,9 @@ def website():
|
|||
if not safe:
|
||||
suffix = matches.next(website_match, PreferTitleOverWebsite.valid_followers, 0)
|
||||
if suffix:
|
||||
to_remove.append(website_match)
|
||||
group = matches.markers.at_match(website_match, lambda marker: marker.name == 'group', 0)
|
||||
if not group:
|
||||
to_remove.append(website_match)
|
||||
return to_remove
|
||||
|
||||
rebulk.rules(PreferTitleOverWebsite, ValidateWebsitePrefix)
|
||||
|
|
335
libs/guessit/test/enable_disable_properties.yml
Normal file
335
libs/guessit/test/enable_disable_properties.yml
Normal file
|
@ -0,0 +1,335 @@
|
|||
? vorbis
|
||||
: options: --exclude audio_codec
|
||||
-audio_codec: Vorbis
|
||||
|
||||
? DTS-ES
|
||||
: options: --exclude audio_profile
|
||||
audio_codec: DTS
|
||||
-audio_profile: Extended Surround
|
||||
|
||||
? DTS.ES
|
||||
: options: --include audio_codec
|
||||
audio_codec: DTS
|
||||
-audio_profile: Extended Surround
|
||||
|
||||
? 5.1
|
||||
? 5ch
|
||||
? 6ch
|
||||
: options: --exclude audio_channels
|
||||
-audio_channels: '5.1'
|
||||
|
||||
? Movie Title-x01-Other Title.mkv
|
||||
? Movie Title-x01-Other Title
|
||||
? directory/Movie Title-x01-Other Title/file.mkv
|
||||
: options: --exclude bonus
|
||||
-bonus: 1
|
||||
-bonus_title: Other Title
|
||||
|
||||
? Title-x02-Bonus Title.mkv
|
||||
: options: --include bonus
|
||||
bonus: 2
|
||||
-bonus_title: Other Title
|
||||
|
||||
? cd 1of3
|
||||
: options: --exclude cd
|
||||
-cd: 1
|
||||
-cd_count: 3
|
||||
|
||||
? This.is.Us
|
||||
: options: --exclude country
|
||||
title: This is Us
|
||||
-country: US
|
||||
|
||||
? 2015.01.31
|
||||
: options: --exclude date
|
||||
year: 2015
|
||||
-date: 2015-01-31
|
||||
|
||||
? Something 2 mar 2013)
|
||||
: options: --exclude date
|
||||
-date: 2013-03-02
|
||||
|
||||
? 2012 2009 S01E02 2015 # If no year is marked, the second one is guessed.
|
||||
: options: --exclude year
|
||||
-year: 2009
|
||||
|
||||
? Director's cut
|
||||
: options: --exclude edition
|
||||
-edition: Director's Cut
|
||||
|
||||
? 2x5
|
||||
? 2X5
|
||||
? 02x05
|
||||
? 2X05
|
||||
? 02x5
|
||||
? S02E05
|
||||
? s02e05
|
||||
? s02e5
|
||||
? s2e05
|
||||
? s02ep05
|
||||
? s2EP5
|
||||
: options: --exclude season
|
||||
-season: 2
|
||||
-episode: 5
|
||||
|
||||
? 2x6
|
||||
? 2X6
|
||||
? 02x06
|
||||
? 2X06
|
||||
? 02x6
|
||||
? S02E06
|
||||
? s02e06
|
||||
? s02e6
|
||||
? s2e06
|
||||
? s02ep06
|
||||
? s2EP6
|
||||
: options: --exclude episode
|
||||
-season: 2
|
||||
-episode: 6
|
||||
|
||||
? serie Season 2 other
|
||||
: options: --exclude season
|
||||
-season: 2
|
||||
|
||||
? Some Dummy Directory/S02 Some Series/E01-Episode title.mkv
|
||||
: options: --exclude episode_title
|
||||
-episode_title: Episode title
|
||||
season: 2
|
||||
episode: 1
|
||||
|
||||
? Another Dummy Directory/S02 Some Series/E01-Episode title.mkv
|
||||
: options: --include season --include episode
|
||||
-episode_title: Episode title
|
||||
season: 2
|
||||
episode: 1
|
||||
|
||||
# pattern contains season and episode: it wont work enabling only one
|
||||
? Some Series S03E01E02
|
||||
: options: --include episode
|
||||
-season: 3
|
||||
-episode: [1, 2]
|
||||
|
||||
# pattern contains season and episode: it wont work enabling only one
|
||||
? Another Series S04E01E02
|
||||
: options: --include season
|
||||
-season: 4
|
||||
-episode: [1, 2]
|
||||
|
||||
? Show.Name.Season.4.Episode.1
|
||||
: options: --include episode
|
||||
-season: 4
|
||||
episode: 1
|
||||
|
||||
? Another.Show.Name.Season.4.Episode.1
|
||||
: options: --include season
|
||||
season: 4
|
||||
-episode: 1
|
||||
|
||||
? Some Series S01 02 03
|
||||
: options: --exclude season
|
||||
-season: [1, 2, 3]
|
||||
|
||||
? Some Series E01 02 04
|
||||
: options: --exclude episode
|
||||
-episode: [1, 2, 4]
|
||||
|
||||
? A very special episode s06 special
|
||||
: options: -t episode --exclude episode_details
|
||||
season: 6
|
||||
-episode_details: Special
|
||||
|
||||
? S01D02.3-5-GROUP
|
||||
: options: --exclude disc
|
||||
-season: 1
|
||||
-disc: [2, 3, 4, 5]
|
||||
-episode: [2, 3, 4, 5]
|
||||
|
||||
? S01D02&4-6&8
|
||||
: options: --exclude season
|
||||
-season: 1
|
||||
-disc: [2, 4, 5, 6, 8]
|
||||
-episode: [2, 4, 5, 6, 8]
|
||||
|
||||
? Film Title-f01-Series Title.mkv
|
||||
: options: --exclude film
|
||||
-film: 1
|
||||
-film_title: Film Title
|
||||
|
||||
? Another Film Title-f01-Series Title.mkv
|
||||
: options: --exclude film_title
|
||||
film: 1
|
||||
-film_title: Film Title
|
||||
|
||||
? English
|
||||
? .ENG.
|
||||
: options: --exclude language
|
||||
-language: English
|
||||
|
||||
? SubFrench
|
||||
? SubFr
|
||||
? STFr
|
||||
: options: --exclude subtitle_language
|
||||
-language: French
|
||||
-subtitle_language: French
|
||||
|
||||
? ST.FR
|
||||
: options: --exclude subtitle_language
|
||||
language: French
|
||||
-subtitle_language: French
|
||||
|
||||
? ENG.-.sub.FR
|
||||
? ENG.-.FR Sub
|
||||
: options: --include language
|
||||
language: [English, French]
|
||||
-subtitle_language: French
|
||||
|
||||
? ENG.-.SubFR
|
||||
: options: --include language
|
||||
language: English
|
||||
-subtitle_language: French
|
||||
|
||||
? ENG.-.FRSUB
|
||||
? ENG.-.FRSUBS
|
||||
? ENG.-.FR-SUBS
|
||||
: options: --include subtitle_language
|
||||
-language: English
|
||||
subtitle_language: French
|
||||
|
||||
? DVD.Real.XViD
|
||||
? DVD.fix.XViD
|
||||
: options: --exclude other
|
||||
-other: Fix
|
||||
-proper_count: 1
|
||||
|
||||
? Part 3
|
||||
? Part III
|
||||
? Part Three
|
||||
? Part Trois
|
||||
? Part3
|
||||
: options: --exclude part
|
||||
-part: 3
|
||||
|
||||
? Some.Title.XViD-by.Artik[SEDG].avi
|
||||
: options: --exclude release_group
|
||||
-release_group: Artik[SEDG]
|
||||
|
||||
? "[ABC] Some.Title.avi"
|
||||
? some/folder/[ABC]Some.Title.avi
|
||||
: options: --exclude release_group
|
||||
-release_group: ABC
|
||||
|
||||
? 360p
|
||||
? 360px
|
||||
? "360"
|
||||
? +500x360
|
||||
: options: --exclude screen_size
|
||||
-screen_size: 360p
|
||||
|
||||
? 640x360
|
||||
: options: --exclude aspect_ratio
|
||||
screen_size: 360p
|
||||
-aspect_ratio: 1.778
|
||||
|
||||
? 8196x4320
|
||||
: options: --exclude screen_size
|
||||
-screen_size: 4320p
|
||||
-aspect_ratio: 1.897
|
||||
|
||||
? 4.3gb
|
||||
: options: --exclude size
|
||||
-size: 4.3GB
|
||||
|
||||
? VhS_rip
|
||||
? VHS.RIP
|
||||
: options: --exclude source
|
||||
-source: VHS
|
||||
-other: Rip
|
||||
|
||||
? DVD.RIP
|
||||
: options: --include other
|
||||
-source: DVD
|
||||
-other: Rip
|
||||
|
||||
? Title Only.avi
|
||||
: options: --exclude title
|
||||
-title: Title Only
|
||||
|
||||
? h265
|
||||
? x265
|
||||
? h.265
|
||||
? x.265
|
||||
? hevc
|
||||
: options: --exclude video_codec
|
||||
-video_codec: H.265
|
||||
|
||||
? hevc10
|
||||
: options: --include color_depth
|
||||
-video_codec: H.265
|
||||
-color_depth: 10-bit
|
||||
|
||||
? HEVC-YUV420P10
|
||||
: options: --include color_depth
|
||||
-video_codec: H.265
|
||||
color_depth: 10-bit
|
||||
|
||||
? h265-HP
|
||||
: options: --exclude video_profile
|
||||
video_codec: H.265
|
||||
-video_profile: High
|
||||
|
||||
? House.of.Cards.2013.S02E03.1080p.NF.WEBRip.DD5.1.x264-NTb.mkv
|
||||
? House.of.Cards.2013.S02E03.1080p.Netflix.WEBRip.DD5.1.x264-NTb.mkv
|
||||
: options: --exclude streaming_service
|
||||
-streaming_service: Netflix
|
||||
|
||||
? wawa.co.uk
|
||||
: options: --exclude website
|
||||
-website: wawa.co.uk
|
||||
|
||||
? movie.mp4
|
||||
: options: --exclude mimetype
|
||||
-mimetype: video/mp4
|
||||
|
||||
? another movie.mkv
|
||||
: options: --exclude container
|
||||
-container: mkv
|
||||
|
||||
? series s02e01
|
||||
: options: --exclude type
|
||||
-type: episode
|
||||
|
||||
? series s02e01
|
||||
: options: --exclude type
|
||||
-type: episode
|
||||
|
||||
? Hotel.Hell.S01E01.720p.DD5.1.448kbps-ALANiS
|
||||
: options: --exclude audio_bit_rate
|
||||
-audio_bit_rate: 448Kbps
|
||||
|
||||
? Katy Perry - Pepsi & Billboard Summer Beats Concert Series 2012 1080i HDTV 20 Mbps DD2.0 MPEG2-TrollHD.ts
|
||||
: options: --exclude video_bit_rate
|
||||
-video_bit_rate: 20Mbps
|
||||
|
||||
? "[Figmentos] Monster 34 - At the End of Darkness [781219F1].mkv"
|
||||
: options: --exclude crc32
|
||||
-crc32: 781219F1
|
||||
|
||||
? 1080p25
|
||||
: options: --exclude frame_rate
|
||||
screen_size: 1080p
|
||||
-frame_rate: 25fps
|
||||
|
||||
? 1080p25
|
||||
: options: --exclude screen_size
|
||||
-screen_size: 1080p
|
||||
-frame_rate: 25fps
|
||||
|
||||
? 1080p25
|
||||
: options: --include frame_rate
|
||||
-screen_size: 1080p
|
||||
-frame_rate: 25fps
|
||||
|
||||
? 1080p 30fps
|
||||
: options: --exclude screen_size
|
||||
-screen_size: 1080p
|
||||
frame_rate: 30fps
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -8,22 +8,25 @@
|
|||
? +lame3.100
|
||||
: audio_codec: MP3
|
||||
|
||||
? +MP2
|
||||
: audio_codec: MP2
|
||||
|
||||
? +DolbyDigital
|
||||
? +DD
|
||||
? +Dolby Digital
|
||||
? +AC3
|
||||
: audio_codec: AC3
|
||||
: audio_codec: Dolby Digital
|
||||
|
||||
? +DDP
|
||||
? +DD+
|
||||
? +EAC3
|
||||
: audio_codec: EAC3
|
||||
: audio_codec: Dolby Digital Plus
|
||||
|
||||
? +DolbyAtmos
|
||||
? +Dolby Atmos
|
||||
? +Atmos
|
||||
? -Atmosphere
|
||||
: audio_codec: DolbyAtmos
|
||||
: audio_codec: Dolby Atmos
|
||||
|
||||
? +AAC
|
||||
: audio_codec: AAC
|
||||
|
@ -36,33 +39,34 @@
|
|||
|
||||
? +True-HD
|
||||
? +trueHD
|
||||
: audio_codec: TrueHD
|
||||
: audio_codec: Dolby TrueHD
|
||||
|
||||
? +True-HD51
|
||||
? +trueHD51
|
||||
: audio_codec: TrueHD
|
||||
: audio_codec: Dolby TrueHD
|
||||
audio_channels: '5.1'
|
||||
|
||||
|
||||
? +DTSHD
|
||||
? +DTS HD
|
||||
? +DTS-HD
|
||||
: audio_codec: DTS
|
||||
audio_profile: HD
|
||||
: audio_codec: DTS-HD
|
||||
|
||||
? +DTS-HDma
|
||||
: audio_codec: DTS
|
||||
audio_profile: HDMA
|
||||
? +DTSMA
|
||||
: audio_codec: DTS-HD
|
||||
audio_profile: Master Audio
|
||||
|
||||
? +AC3-hq
|
||||
: audio_codec: AC3
|
||||
audio_profile: HQ
|
||||
: audio_codec: Dolby Digital
|
||||
audio_profile: High Quality
|
||||
|
||||
? +AAC-HE
|
||||
: audio_codec: AAC
|
||||
audio_profile: HE
|
||||
audio_profile: High Efficiency
|
||||
|
||||
? +AAC-LC
|
||||
: audio_codec: AAC
|
||||
audio_profile: LC
|
||||
audio_profile: Low Complexity
|
||||
|
||||
? +AAC2.0
|
||||
? +AAC20
|
||||
|
@ -90,8 +94,41 @@
|
|||
|
||||
? DD5.1
|
||||
? DD51
|
||||
: audio_codec: AC3
|
||||
: audio_codec: Dolby Digital
|
||||
audio_channels: '5.1'
|
||||
|
||||
? -51
|
||||
: audio_channels: '5.1'
|
||||
|
||||
? DTS-HD.HRA
|
||||
? DTSHD.HRA
|
||||
? DTS-HD.HR
|
||||
? DTSHD.HR
|
||||
? -HRA
|
||||
? -HR
|
||||
: audio_codec: DTS-HD
|
||||
audio_profile: High Resolution Audio
|
||||
|
||||
? DTSES
|
||||
? DTS-ES
|
||||
? -ES
|
||||
: audio_codec: DTS
|
||||
audio_profile: Extended Surround
|
||||
|
||||
? DD-EX
|
||||
? DDEX
|
||||
? -EX
|
||||
: audio_codec: Dolby Digital
|
||||
audio_profile: EX
|
||||
|
||||
? OPUS
|
||||
: audio_codec: Opus
|
||||
|
||||
? Vorbis
|
||||
: audio_codec: Vorbis
|
||||
|
||||
? PCM
|
||||
: audio_codec: PCM
|
||||
|
||||
? LPCM
|
||||
: audio_codec: LPCM
|
||||
|
|
|
@ -7,4 +7,4 @@
|
|||
? Some.Title-DVDRIP-x264-CDP
|
||||
: cd: !!null
|
||||
release_group: CDP
|
||||
video_codec: h264
|
||||
video_codec: H.264
|
||||
|
|
467
libs/guessit/test/rules/common_words.yml
Normal file
467
libs/guessit/test/rules/common_words.yml
Normal file
|
@ -0,0 +1,467 @@
|
|||
? is
|
||||
: title: is
|
||||
|
||||
? it
|
||||
: title: it
|
||||
|
||||
? am
|
||||
: title: am
|
||||
|
||||
? mad
|
||||
: title: mad
|
||||
|
||||
? men
|
||||
: title: men
|
||||
|
||||
? man
|
||||
: title: man
|
||||
|
||||
? run
|
||||
: title: run
|
||||
|
||||
? sin
|
||||
: title: sin
|
||||
|
||||
? st
|
||||
: title: st
|
||||
|
||||
? to
|
||||
: title: to
|
||||
|
||||
? 'no'
|
||||
: title: 'no'
|
||||
|
||||
? non
|
||||
: title: non
|
||||
|
||||
? war
|
||||
: title: war
|
||||
|
||||
? min
|
||||
: title: min
|
||||
|
||||
? new
|
||||
: title: new
|
||||
|
||||
? car
|
||||
: title: car
|
||||
|
||||
? day
|
||||
: title: day
|
||||
|
||||
? bad
|
||||
: title: bad
|
||||
|
||||
? bat
|
||||
: title: bat
|
||||
|
||||
? fan
|
||||
: title: fan
|
||||
|
||||
? fry
|
||||
: title: fry
|
||||
|
||||
? cop
|
||||
: title: cop
|
||||
|
||||
? zen
|
||||
: title: zen
|
||||
|
||||
? gay
|
||||
: title: gay
|
||||
|
||||
? fat
|
||||
: title: fat
|
||||
|
||||
? one
|
||||
: title: one
|
||||
|
||||
? cherokee
|
||||
: title: cherokee
|
||||
|
||||
? got
|
||||
: title: got
|
||||
|
||||
? an
|
||||
: title: an
|
||||
|
||||
? as
|
||||
: title: as
|
||||
|
||||
? cat
|
||||
: title: cat
|
||||
|
||||
? her
|
||||
: title: her
|
||||
|
||||
? be
|
||||
: title: be
|
||||
|
||||
? hat
|
||||
: title: hat
|
||||
|
||||
? sun
|
||||
: title: sun
|
||||
|
||||
? may
|
||||
: title: may
|
||||
|
||||
? my
|
||||
: title: my
|
||||
|
||||
? mr
|
||||
: title: mr
|
||||
|
||||
? rum
|
||||
: title: rum
|
||||
|
||||
? pi
|
||||
: title: pi
|
||||
|
||||
? bb
|
||||
: title: bb
|
||||
|
||||
? bt
|
||||
: title: bt
|
||||
|
||||
? tv
|
||||
: title: tv
|
||||
|
||||
? aw
|
||||
: title: aw
|
||||
|
||||
? by
|
||||
: title: by
|
||||
|
||||
? md
|
||||
: other: Mic Dubbed
|
||||
|
||||
? mp
|
||||
: title: mp
|
||||
|
||||
? cd
|
||||
: title: cd
|
||||
|
||||
? in
|
||||
: title: in
|
||||
|
||||
? ad
|
||||
: title: ad
|
||||
|
||||
? ice
|
||||
: title: ice
|
||||
|
||||
? ay
|
||||
: title: ay
|
||||
|
||||
? at
|
||||
: title: at
|
||||
|
||||
? star
|
||||
: title: star
|
||||
|
||||
? so
|
||||
: title: so
|
||||
|
||||
? he
|
||||
: title: he
|
||||
|
||||
? do
|
||||
: title: do
|
||||
|
||||
? ax
|
||||
: title: ax
|
||||
|
||||
? mx
|
||||
: title: mx
|
||||
|
||||
? bas
|
||||
: title: bas
|
||||
|
||||
? de
|
||||
: title: de
|
||||
|
||||
? le
|
||||
: title: le
|
||||
|
||||
? son
|
||||
: title: son
|
||||
|
||||
? ne
|
||||
: title: ne
|
||||
|
||||
? ca
|
||||
: title: ca
|
||||
|
||||
? ce
|
||||
: title: ce
|
||||
|
||||
? et
|
||||
: title: et
|
||||
|
||||
? que
|
||||
: title: que
|
||||
|
||||
? mal
|
||||
: title: mal
|
||||
|
||||
? est
|
||||
: title: est
|
||||
|
||||
? vol
|
||||
: title: vol
|
||||
|
||||
? or
|
||||
: title: or
|
||||
|
||||
? mon
|
||||
: title: mon
|
||||
|
||||
? se
|
||||
: title: se
|
||||
|
||||
? je
|
||||
: title: je
|
||||
|
||||
? tu
|
||||
: title: tu
|
||||
|
||||
? me
|
||||
: title: me
|
||||
|
||||
? ma
|
||||
: title: ma
|
||||
|
||||
? va
|
||||
: title: va
|
||||
|
||||
? au
|
||||
: country: AU
|
||||
|
||||
? lu
|
||||
: title: lu
|
||||
|
||||
? wa
|
||||
: title: wa
|
||||
|
||||
? ga
|
||||
: title: ga
|
||||
|
||||
? ao
|
||||
: title: ao
|
||||
|
||||
? la
|
||||
: title: la
|
||||
|
||||
? el
|
||||
: title: el
|
||||
|
||||
? del
|
||||
: title: del
|
||||
|
||||
? por
|
||||
: title: por
|
||||
|
||||
? mar
|
||||
: title: mar
|
||||
|
||||
? al
|
||||
: title: al
|
||||
|
||||
? un
|
||||
: title: un
|
||||
|
||||
? ind
|
||||
: title: ind
|
||||
|
||||
? arw
|
||||
: title: arw
|
||||
|
||||
? ts
|
||||
: source: Telesync
|
||||
|
||||
? ii
|
||||
: title: ii
|
||||
|
||||
? bin
|
||||
: title: bin
|
||||
|
||||
? chan
|
||||
: title: chan
|
||||
|
||||
? ss
|
||||
: title: ss
|
||||
|
||||
? san
|
||||
: title: san
|
||||
|
||||
? oss
|
||||
: title: oss
|
||||
|
||||
? iii
|
||||
: title: iii
|
||||
|
||||
? vi
|
||||
: title: vi
|
||||
|
||||
? ben
|
||||
: title: ben
|
||||
|
||||
? da
|
||||
: title: da
|
||||
|
||||
? lt
|
||||
: title: lt
|
||||
|
||||
? ch
|
||||
: title: ch
|
||||
|
||||
? sr
|
||||
: title: sr
|
||||
|
||||
? ps
|
||||
: title: ps
|
||||
|
||||
? cx
|
||||
: title: cx
|
||||
|
||||
? vo
|
||||
: title: vo
|
||||
|
||||
? mkv
|
||||
: container: mkv
|
||||
|
||||
? avi
|
||||
: container: avi
|
||||
|
||||
? dmd
|
||||
: title: dmd
|
||||
|
||||
? the
|
||||
: title: the
|
||||
|
||||
? dis
|
||||
: title: dis
|
||||
|
||||
? cut
|
||||
: title: cut
|
||||
|
||||
? stv
|
||||
: title: stv
|
||||
|
||||
? des
|
||||
: title: des
|
||||
|
||||
? dia
|
||||
: title: dia
|
||||
|
||||
? and
|
||||
: title: and
|
||||
|
||||
? cab
|
||||
: title: cab
|
||||
|
||||
? sub
|
||||
: title: sub
|
||||
|
||||
? mia
|
||||
: title: mia
|
||||
|
||||
? rim
|
||||
: title: rim
|
||||
|
||||
? las
|
||||
: title: las
|
||||
|
||||
? une
|
||||
: title: une
|
||||
|
||||
? par
|
||||
: title: par
|
||||
|
||||
? srt
|
||||
: container: srt
|
||||
|
||||
? ano
|
||||
: title: ano
|
||||
|
||||
? toy
|
||||
: title: toy
|
||||
|
||||
? job
|
||||
: title: job
|
||||
|
||||
? gag
|
||||
: title: gag
|
||||
|
||||
? reel
|
||||
: title: reel
|
||||
|
||||
? www
|
||||
: title: www
|
||||
|
||||
? for
|
||||
: title: for
|
||||
|
||||
? ayu
|
||||
: title: ayu
|
||||
|
||||
? csi
|
||||
: title: csi
|
||||
|
||||
? ren
|
||||
: title: ren
|
||||
|
||||
? moi
|
||||
: title: moi
|
||||
|
||||
? sur
|
||||
: title: sur
|
||||
|
||||
? fer
|
||||
: title: fer
|
||||
|
||||
? fun
|
||||
: title: fun
|
||||
|
||||
? two
|
||||
: title: two
|
||||
|
||||
? big
|
||||
: title: big
|
||||
|
||||
? psy
|
||||
: title: psy
|
||||
|
||||
? air
|
||||
: title: air
|
||||
|
||||
? brazil
|
||||
: title: brazil
|
||||
|
||||
? jordan
|
||||
: title: jordan
|
||||
|
||||
? bs
|
||||
: title: bs
|
||||
|
||||
? kz
|
||||
: title: kz
|
||||
|
||||
? gt
|
||||
: title: gt
|
||||
|
||||
? im
|
||||
: title: im
|
||||
|
||||
? pt
|
||||
: language: pt
|
||||
|
||||
? scr
|
||||
: title: scr
|
||||
|
||||
? sd
|
||||
: title: sd
|
||||
|
||||
? hr
|
||||
: other: High Resolution
|
|
@ -5,6 +5,9 @@
|
|||
: country: US
|
||||
title: this is title
|
||||
|
||||
? This.is.us.title
|
||||
: title: This is us title
|
||||
? This.is.Us
|
||||
: title: This is Us
|
||||
|
||||
? This.Is.Us
|
||||
: options: --no-default-config
|
||||
title: This Is Us
|
||||
|
|
|
@ -7,25 +7,57 @@
|
|||
? Collector
|
||||
? Collector Edition
|
||||
? Edition Collector
|
||||
: edition: Collector Edition
|
||||
: edition: Collector
|
||||
|
||||
? Special Edition
|
||||
? Edition Special
|
||||
? -Special
|
||||
: edition: Special Edition
|
||||
: edition: Special
|
||||
|
||||
? Criterion Edition
|
||||
? Edition Criterion
|
||||
? CC
|
||||
? -Criterion
|
||||
: edition: Criterion Edition
|
||||
: edition: Criterion
|
||||
|
||||
? Deluxe
|
||||
? Deluxe Edition
|
||||
? Edition Deluxe
|
||||
: edition: Deluxe Edition
|
||||
: edition: Deluxe
|
||||
|
||||
? Super Movie Alternate XViD
|
||||
? Super Movie Alternative XViD
|
||||
? Super Movie Alternate Cut XViD
|
||||
? Super Movie Alternative Cut XViD
|
||||
: edition: Alternative Cut
|
||||
|
||||
? ddc
|
||||
: edition: Director's Definitive Cut
|
||||
|
||||
? IMAX
|
||||
? IMAX Edition
|
||||
: edition: IMAX
|
||||
|
||||
? ultimate edition
|
||||
? -ultimate
|
||||
: edition: Ultimate
|
||||
|
||||
? ultimate collector edition
|
||||
? ultimate collector's edition
|
||||
? ultimate collectors edition
|
||||
? -collectors edition
|
||||
? -ultimate edition
|
||||
: edition: [Ultimate, Collector]
|
||||
|
||||
? ultimate collectors edition dc
|
||||
: edition: [Ultimate, Collector, Director's Cut]
|
||||
|
||||
? fan edit
|
||||
? fan edition
|
||||
? fan collection
|
||||
: edition: Fan
|
||||
|
||||
? ultimate fan edit
|
||||
? ultimate fan edition
|
||||
? ultimate fan collection
|
||||
: edition: [Ultimate, Fan]
|
||||
|
|
|
@ -32,8 +32,6 @@
|
|||
? +serie Season 2 other
|
||||
? +serie Saisons 2 other
|
||||
? +serie Seasons 2 other
|
||||
? +serie Serie 2 other
|
||||
? +serie Series 2 other
|
||||
? +serie Season Two other
|
||||
? +serie Season II other
|
||||
: season: 2
|
||||
|
@ -156,7 +154,7 @@
|
|||
|
||||
? Show.Name.Season.1.3&5.HDTV.XviD-GoodGroup[SomeTrash]
|
||||
? Show.Name.Season.1.3 and 5.HDTV.XviD-GoodGroup[SomeTrash]
|
||||
: format: HDTV
|
||||
: source: HDTV
|
||||
release_group: GoodGroup[SomeTrash]
|
||||
season:
|
||||
- 1
|
||||
|
@ -164,12 +162,12 @@
|
|||
- 5
|
||||
title: Show Name
|
||||
type: episode
|
||||
video_codec: XviD
|
||||
video_codec: Xvid
|
||||
|
||||
? Show.Name.Season.1.2.3-5.HDTV.XviD-GoodGroup[SomeTrash]
|
||||
? Show.Name.Season.1.2.3~5.HDTV.XviD-GoodGroup[SomeTrash]
|
||||
? Show.Name.Season.1.2.3 to 5.HDTV.XviD-GoodGroup[SomeTrash]
|
||||
: format: HDTV
|
||||
: source: HDTV
|
||||
release_group: GoodGroup[SomeTrash]
|
||||
season:
|
||||
- 1
|
||||
|
@ -179,18 +177,19 @@
|
|||
- 5
|
||||
title: Show Name
|
||||
type: episode
|
||||
video_codec: XviD
|
||||
video_codec: Xvid
|
||||
|
||||
? The.Get.Down.S01EP01.FRENCH.720p.WEBRIP.XVID-STR
|
||||
: episode: 1
|
||||
format: WEBRip
|
||||
source: Web
|
||||
other: Rip
|
||||
language: fr
|
||||
release_group: STR
|
||||
screen_size: 720p
|
||||
season: 1
|
||||
title: The Get Down
|
||||
type: episode
|
||||
video_codec: XviD
|
||||
video_codec: Xvid
|
||||
|
||||
? My.Name.Is.Earl.S01E01-S01E21.SWE-SUB
|
||||
: episode:
|
||||
|
@ -269,4 +268,64 @@
|
|||
|
||||
? Episode71
|
||||
? Episode 71
|
||||
: episode: 71
|
||||
: episode: 71
|
||||
|
||||
? S01D02.3-5-GROUP
|
||||
: disc: [2, 3, 4, 5]
|
||||
|
||||
? S01D02&4-6&8
|
||||
: disc: [2, 4, 5, 6, 8]
|
||||
|
||||
? Something.4x05-06
|
||||
? Something - 4x05-06
|
||||
? Something:4x05-06
|
||||
? Something 4x05-06
|
||||
? Something-4x05-06
|
||||
: title: Something
|
||||
season: 4
|
||||
episode:
|
||||
- 5
|
||||
- 6
|
||||
|
||||
? Something.4x05-06
|
||||
? Something - 4x05-06
|
||||
? Something:4x05-06
|
||||
? Something 4x05-06
|
||||
? Something-4x05-06
|
||||
: options: -T something
|
||||
title: something
|
||||
season: 4
|
||||
episode:
|
||||
- 5
|
||||
- 6
|
||||
|
||||
? Colony 23/S01E01.Some.title.mkv
|
||||
: title: Colony 23
|
||||
season: 1
|
||||
episode: 1
|
||||
episode_title: Some title
|
||||
|
||||
? Show.Name.E02.2010.mkv
|
||||
: options: -t episode
|
||||
title: Show Name
|
||||
year: 2010
|
||||
episode: 2
|
||||
|
||||
? Show.Name.E02.S2010.mkv
|
||||
: options: -t episode
|
||||
title: Show Name
|
||||
year: 2010
|
||||
season: 2010
|
||||
episode: 2
|
||||
|
||||
|
||||
? Show.Name.E02.2010.mkv
|
||||
: title: Show Name
|
||||
year: 2010
|
||||
episode: 2
|
||||
|
||||
? Show.Name.E02.S2010.mkv
|
||||
: title: Show Name
|
||||
year: 2010
|
||||
season: 2010
|
||||
episode: 2
|
||||
|
|
|
@ -1,138 +0,0 @@
|
|||
# Multiple input strings having same expected results can be chained.
|
||||
# Use - marker to check inputs that should not match results.
|
||||
? +VHS
|
||||
? +VHSRip
|
||||
? +VHS-Rip
|
||||
? +VhS_rip
|
||||
? +VHS.RIP
|
||||
? -VHSAnythingElse
|
||||
? -SomeVHS stuff
|
||||
? -VH
|
||||
? -VHx
|
||||
? -VHxRip
|
||||
: format: VHS
|
||||
|
||||
? +Cam
|
||||
? +CamRip
|
||||
? +CaM Rip
|
||||
? +Cam_Rip
|
||||
? +cam.rip
|
||||
: format: Cam
|
||||
|
||||
? +Telesync
|
||||
? +TS
|
||||
? +HD TS
|
||||
? -Hd.Ts # ts file extension
|
||||
? -HD.TS # ts file extension
|
||||
? +Hd-Ts
|
||||
: format: Telesync
|
||||
|
||||
? +Workprint
|
||||
? +workPrint
|
||||
? +WorkPrint
|
||||
? +WP
|
||||
? -Work Print
|
||||
: format: Workprint
|
||||
|
||||
? +Telecine
|
||||
? +teleCine
|
||||
? +TC
|
||||
? -Tele Cine
|
||||
: format: Telecine
|
||||
|
||||
? +PPV
|
||||
? +ppv-rip
|
||||
: format: PPV
|
||||
|
||||
? -TV
|
||||
? +SDTV
|
||||
? +SDTVRIP
|
||||
? +Rip sd tv
|
||||
? +TvRip
|
||||
? +Rip TV
|
||||
: format: TV
|
||||
|
||||
? +DVB
|
||||
? +DVB-Rip
|
||||
? +DvBRiP
|
||||
? +pdTV
|
||||
? +Pd Tv
|
||||
: format: DVB
|
||||
|
||||
? +DVD
|
||||
? +DVD-RIP
|
||||
? +video ts
|
||||
? +DVDR
|
||||
? +DVD 9
|
||||
? +dvd 5
|
||||
? -dvd ts
|
||||
: format: DVD
|
||||
-format: ts
|
||||
|
||||
? +HDTV
|
||||
? +tv rip hd
|
||||
? +HDtv Rip
|
||||
? +HdRip
|
||||
: format: HDTV
|
||||
|
||||
? +VOD
|
||||
? +VodRip
|
||||
? +vod rip
|
||||
: format: VOD
|
||||
|
||||
? +webrip
|
||||
? +Web Rip
|
||||
? +webdlrip
|
||||
? +web dl rip
|
||||
? +webcap
|
||||
? +web cap
|
||||
: format: WEBRip
|
||||
|
||||
? +webdl
|
||||
? +Web DL
|
||||
? +webHD
|
||||
? +WEB hd
|
||||
? +web
|
||||
: format: WEB-DL
|
||||
|
||||
? +HDDVD
|
||||
? +hd dvd
|
||||
? +hdDvdRip
|
||||
: format: HD-DVD
|
||||
|
||||
? +BluRay
|
||||
? +BluRay rip
|
||||
? +BD
|
||||
? +BR
|
||||
? +BDRip
|
||||
? +BR rip
|
||||
? +BD5
|
||||
? +BD9
|
||||
? +BD25
|
||||
? +bd50
|
||||
: format: BluRay
|
||||
|
||||
? XVID.NTSC.DVDR.nfo
|
||||
: format: DVD
|
||||
|
||||
? AHDTV
|
||||
: format: AHDTV
|
||||
|
||||
? dsr
|
||||
? dsrip
|
||||
? ds rip
|
||||
? dsrrip
|
||||
? dsr rip
|
||||
? satrip
|
||||
? sat rip
|
||||
? dth
|
||||
? dthrip
|
||||
? dth rip
|
||||
: format: SATRip
|
||||
|
||||
? HDTC
|
||||
: format: HDTC
|
||||
|
||||
? UHDTV
|
||||
? UHDRip
|
||||
: format: UHDTV
|
|
@ -36,4 +36,12 @@
|
|||
? +ENG.-.SubSV
|
||||
? +ENG.-.SVSUB
|
||||
: language: English
|
||||
subtitle_language: Swedish
|
||||
subtitle_language: Swedish
|
||||
|
||||
? The English Patient (1996)
|
||||
: title: The English Patient
|
||||
-language: english
|
||||
|
||||
? French.Kiss.1995.1080p
|
||||
: title: French Kiss
|
||||
-language: french
|
||||
|
|
|
@ -12,38 +12,35 @@
|
|||
? +AudioFixed
|
||||
? +Audio Fix
|
||||
? +Audio Fixed
|
||||
: other: AudioFix
|
||||
: other: Audio Fixed
|
||||
|
||||
? +SyncFix
|
||||
? +SyncFixed
|
||||
? +Sync Fix
|
||||
? +Sync Fixed
|
||||
: other: SyncFix
|
||||
: other: Sync Fixed
|
||||
|
||||
? +DualAudio
|
||||
? +Dual Audio
|
||||
: other: DualAudio
|
||||
: other: Dual Audio
|
||||
|
||||
? +ws
|
||||
? +WideScreen
|
||||
? +Wide Screen
|
||||
: other: WideScreen
|
||||
: other: Widescreen
|
||||
|
||||
# Fix and Real must be surround by others properties to be matched.
|
||||
? DVD.Real.XViD
|
||||
# Fix must be surround by others properties to be matched.
|
||||
? DVD.fix.XViD
|
||||
? -DVD.Real
|
||||
? -DVD.Fix
|
||||
? -Real.XViD
|
||||
? -Fix.XViD
|
||||
: other: Proper
|
||||
proper_count: 1
|
||||
: other: Fix
|
||||
-proper_count: 1
|
||||
|
||||
? -DVD.BlablaBla.Fix.Blablabla.XVID
|
||||
? -DVD.BlablaBla.Fix.XVID
|
||||
? -DVD.Fix.Blablabla.XVID
|
||||
: other: Proper
|
||||
proper_count: 1
|
||||
: other: Fix
|
||||
-proper_count: 1
|
||||
|
||||
|
||||
? DVD.Real.PROPER.REPACK
|
||||
|
@ -51,25 +48,27 @@
|
|||
proper_count: 3
|
||||
|
||||
|
||||
? Proper
|
||||
? Proper.720p
|
||||
? +Repack
|
||||
? +Rerip
|
||||
: other: Proper
|
||||
proper_count: 1
|
||||
|
||||
? XViD.Fansub
|
||||
: other: Fansub
|
||||
: other: Fan Subtitled
|
||||
|
||||
? XViD.Fastsub
|
||||
: other: Fastsub
|
||||
: other: Fast Subtitled
|
||||
|
||||
? +Season Complete
|
||||
? -Complete
|
||||
: other: Complete
|
||||
|
||||
? R5
|
||||
: other: Region 5
|
||||
|
||||
? RC
|
||||
: other: R5
|
||||
: other: Region C
|
||||
|
||||
? PreAir
|
||||
? Pre Air
|
||||
|
@ -81,7 +80,7 @@
|
|||
? Remux
|
||||
: other: Remux
|
||||
|
||||
? 3D
|
||||
? 3D.2019
|
||||
: other: 3D
|
||||
|
||||
? HD
|
||||
|
@ -90,28 +89,23 @@
|
|||
? FHD
|
||||
? FullHD
|
||||
? Full HD
|
||||
: other: FullHD
|
||||
: other: Full HD
|
||||
|
||||
? UHD
|
||||
? Ultra
|
||||
? UltraHD
|
||||
? Ultra HD
|
||||
: other: UltraHD
|
||||
: other: Ultra HD
|
||||
|
||||
? mHD # ??
|
||||
: other: mHD
|
||||
|
||||
? HDLight
|
||||
: other: HDLight
|
||||
: other: Micro HD
|
||||
|
||||
? HQ
|
||||
: other: HQ
|
||||
|
||||
? ddc
|
||||
: other: DDC
|
||||
: other: High Quality
|
||||
|
||||
? hr
|
||||
: other: HR
|
||||
: other: High Resolution
|
||||
|
||||
? PAL
|
||||
: other: PAL
|
||||
|
@ -122,15 +116,14 @@
|
|||
? NTSC
|
||||
: other: NTSC
|
||||
|
||||
? CC
|
||||
: other: CC
|
||||
? LDTV
|
||||
: other: Low Definition
|
||||
|
||||
? LD
|
||||
? LDTV
|
||||
: other: LD
|
||||
: other: Line Dubbed
|
||||
|
||||
? MD
|
||||
: other: MD
|
||||
: other: Mic Dubbed
|
||||
|
||||
? -The complete movie
|
||||
: other: Complete
|
||||
|
@ -139,16 +132,38 @@
|
|||
: title: The complete movie
|
||||
|
||||
? +AC3-HQ
|
||||
: audio_profile: HQ
|
||||
: audio_profile: High Quality
|
||||
|
||||
? Other-HQ
|
||||
: other: HQ
|
||||
: other: High Quality
|
||||
|
||||
? reenc
|
||||
? re-enc
|
||||
? re-encoded
|
||||
? reencoded
|
||||
: other: ReEncoded
|
||||
: other: Reencoded
|
||||
|
||||
? CONVERT XViD
|
||||
: other: Converted
|
||||
: other: Converted
|
||||
|
||||
? +HDRIP # it's a Rip from non specified HD source
|
||||
: other: [HD, Rip]
|
||||
|
||||
? SDR
|
||||
: other: Standard Dynamic Range
|
||||
|
||||
? HDR
|
||||
? HDR10
|
||||
? -HDR100
|
||||
: other: HDR10
|
||||
|
||||
? BT2020
|
||||
? BT.2020
|
||||
? -BT.20200
|
||||
? -BT.2021
|
||||
: other: BT.2020
|
||||
|
||||
? Upscaled
|
||||
? Upscale
|
||||
: other: Upscaled
|
||||
|
||||
|
|
|
@ -42,30 +42,30 @@
|
|||
|
||||
? Show.Name.x264-byEMP
|
||||
: title: Show Name
|
||||
video_codec: h264
|
||||
video_codec: H.264
|
||||
release_group: byEMP
|
||||
|
||||
? Show.Name.x264-NovaRip
|
||||
: title: Show Name
|
||||
video_codec: h264
|
||||
video_codec: H.264
|
||||
release_group: NovaRip
|
||||
|
||||
? Show.Name.x264-PARTiCLE
|
||||
: title: Show Name
|
||||
video_codec: h264
|
||||
video_codec: H.264
|
||||
release_group: PARTiCLE
|
||||
|
||||
? Show.Name.x264-POURMOi
|
||||
: title: Show Name
|
||||
video_codec: h264
|
||||
video_codec: H.264
|
||||
release_group: POURMOi
|
||||
|
||||
? Show.Name.x264-RipPourBox
|
||||
: title: Show Name
|
||||
video_codec: h264
|
||||
video_codec: H.264
|
||||
release_group: RipPourBox
|
||||
|
||||
? Show.Name.x264-RiPRG
|
||||
: title: Show Name
|
||||
video_codec: h264
|
||||
video_codec: H.264
|
||||
release_group: RiPRG
|
||||
|
|
|
@ -2,68 +2,279 @@
|
|||
# Use - marker to check inputs that should not match results.
|
||||
? +360p
|
||||
? +360px
|
||||
? +360i
|
||||
? "+360"
|
||||
? -360
|
||||
? +500x360
|
||||
? -250x360
|
||||
: screen_size: 360p
|
||||
|
||||
? +640x360
|
||||
? -640x360i
|
||||
? -684x360i
|
||||
: screen_size: 360p
|
||||
aspect_ratio: 1.778
|
||||
|
||||
? +360i
|
||||
: screen_size: 360i
|
||||
|
||||
? +480x360i
|
||||
? -480x360p
|
||||
? -450x360
|
||||
: screen_size: 360i
|
||||
aspect_ratio: 1.333
|
||||
|
||||
? +368p
|
||||
? +368px
|
||||
? +368i
|
||||
? "+368"
|
||||
? -368i
|
||||
? -368
|
||||
? +500x368
|
||||
: screen_size: 368p
|
||||
|
||||
? -490x368
|
||||
? -700x368
|
||||
: screen_size: 368p
|
||||
|
||||
? +492x368p
|
||||
: screen_size:
|
||||
aspect_ratio: 1.337
|
||||
|
||||
? +654x368
|
||||
: screen_size: 368p
|
||||
aspect_ratio: 1.777
|
||||
|
||||
? +698x368
|
||||
: screen_size: 368p
|
||||
aspect_ratio: 1.897
|
||||
|
||||
? +368i
|
||||
: -screen_size: 368i
|
||||
|
||||
? +480p
|
||||
? +480px
|
||||
? +480i
|
||||
? "+480"
|
||||
? +500x480
|
||||
? -480i
|
||||
? -480
|
||||
? -500x480
|
||||
? -638x480
|
||||
? -920x480
|
||||
: screen_size: 480p
|
||||
|
||||
? +640x480
|
||||
: screen_size: 480p
|
||||
aspect_ratio: 1.333
|
||||
|
||||
? +852x480
|
||||
: screen_size: 480p
|
||||
aspect_ratio: 1.775
|
||||
|
||||
? +910x480
|
||||
: screen_size: 480p
|
||||
aspect_ratio: 1.896
|
||||
|
||||
? +500x480
|
||||
? +500 x 480
|
||||
? +500 * 480
|
||||
? +500x480p
|
||||
? +500X480i
|
||||
: screen_size: 500x480
|
||||
aspect_ratio: 1.042
|
||||
|
||||
? +480i
|
||||
? +852x480i
|
||||
: screen_size: 480i
|
||||
|
||||
? +576p
|
||||
? +576px
|
||||
? +576i
|
||||
? "+576"
|
||||
? +500x576
|
||||
? -576i
|
||||
? -576
|
||||
? -500x576
|
||||
? -766x576
|
||||
? -1094x576
|
||||
: screen_size: 576p
|
||||
|
||||
? +768x576
|
||||
: screen_size: 576p
|
||||
aspect_ratio: 1.333
|
||||
|
||||
? +1024x576
|
||||
: screen_size: 576p
|
||||
aspect_ratio: 1.778
|
||||
|
||||
? +1092x576
|
||||
: screen_size: 576p
|
||||
aspect_ratio: 1.896
|
||||
|
||||
? +500x576
|
||||
: screen_size: 500x576
|
||||
aspect_ratio: 0.868
|
||||
|
||||
? +576i
|
||||
: screen_size: 576i
|
||||
|
||||
? +720p
|
||||
? +720px
|
||||
? -720i
|
||||
? 720hd
|
||||
? 720pHD
|
||||
? +720i
|
||||
? "+720"
|
||||
? +500x720
|
||||
? -720
|
||||
? -500x720
|
||||
? -950x720
|
||||
? -1368x720
|
||||
: screen_size: 720p
|
||||
|
||||
? +960x720
|
||||
: screen_size: 720p
|
||||
aspect_ratio: 1.333
|
||||
|
||||
? +1280x720
|
||||
: screen_size: 720p
|
||||
aspect_ratio: 1.778
|
||||
|
||||
? +1366x720
|
||||
: screen_size: 720p
|
||||
aspect_ratio: 1.897
|
||||
|
||||
? +500x720
|
||||
: screen_size: 500x720
|
||||
aspect_ratio: 0.694
|
||||
|
||||
? +900p
|
||||
? +900px
|
||||
? +900i
|
||||
? "+900"
|
||||
? +500x900
|
||||
? -900i
|
||||
? -900
|
||||
? -500x900
|
||||
? -1198x900
|
||||
? -1710x900
|
||||
: screen_size: 900p
|
||||
|
||||
? +1200x900
|
||||
: screen_size: 900p
|
||||
aspect_ratio: 1.333
|
||||
|
||||
? +1600x900
|
||||
: screen_size: 900p
|
||||
aspect_ratio: 1.778
|
||||
|
||||
? +1708x900
|
||||
: screen_size: 900p
|
||||
aspect_ratio: 1.898
|
||||
|
||||
? +500x900
|
||||
? +500x900p
|
||||
? +500x900i
|
||||
: screen_size: 500x900
|
||||
aspect_ratio: 0.556
|
||||
|
||||
? +900i
|
||||
: screen_size: 900i
|
||||
|
||||
? +1080p
|
||||
? +1080px
|
||||
? +1080hd
|
||||
? +1080pHD
|
||||
? -1080i
|
||||
? "+1080"
|
||||
? +500x1080
|
||||
? -1080
|
||||
? -500x1080
|
||||
? -1438x1080
|
||||
? -2050x1080
|
||||
: screen_size: 1080p
|
||||
|
||||
? +1440x1080
|
||||
: screen_size: 1080p
|
||||
aspect_ratio: 1.333
|
||||
|
||||
? +1920x1080
|
||||
: screen_size: 1080p
|
||||
aspect_ratio: 1.778
|
||||
|
||||
? +2048x1080
|
||||
: screen_size: 1080p
|
||||
aspect_ratio: 1.896
|
||||
|
||||
? +1080i
|
||||
? -1080p
|
||||
: screen_size: 1080i
|
||||
|
||||
? 1440p
|
||||
: screen_size: 1440p
|
||||
|
||||
? +500x1080
|
||||
: screen_size: 500x1080
|
||||
aspect_ratio: 0.463
|
||||
|
||||
? +2160p
|
||||
? +2160px
|
||||
? +2160i
|
||||
? "+2160"
|
||||
? -2160i
|
||||
? -2160
|
||||
? +4096x2160
|
||||
: screen_size: 4K
|
||||
? +4k
|
||||
? -2878x2160
|
||||
? -4100x2160
|
||||
: screen_size: 2160p
|
||||
|
||||
? +2880x2160
|
||||
: screen_size: 2160p
|
||||
aspect_ratio: 1.333
|
||||
|
||||
? +3840x2160
|
||||
: screen_size: 2160p
|
||||
aspect_ratio: 1.778
|
||||
|
||||
? +4098x2160
|
||||
: screen_size: 2160p
|
||||
aspect_ratio: 1.897
|
||||
|
||||
? +500x2160
|
||||
: screen_size: 500x2160
|
||||
aspect_ratio: 0.231
|
||||
|
||||
? +4320p
|
||||
? +4320px
|
||||
? -4320i
|
||||
? -4320
|
||||
? -5758x2160
|
||||
? -8198x2160
|
||||
: screen_size: 4320p
|
||||
|
||||
? +5760x4320
|
||||
: screen_size: 4320p
|
||||
aspect_ratio: 1.333
|
||||
|
||||
? +7680x4320
|
||||
: screen_size: 4320p
|
||||
aspect_ratio: 1.778
|
||||
|
||||
? +8196x4320
|
||||
: screen_size: 4320p
|
||||
aspect_ratio: 1.897
|
||||
|
||||
? +500x4320
|
||||
: screen_size: 500x4320
|
||||
aspect_ratio: 0.116
|
||||
|
||||
? Test.File.720hd.bluray
|
||||
? Test.File.720p24
|
||||
? Test.File.720p30
|
||||
? Test.File.720p50
|
||||
? Test.File.720p60
|
||||
? Test.File.720p120
|
||||
: screen_size: 720p
|
||||
|
||||
? Test.File.400p
|
||||
: options:
|
||||
advanced_config:
|
||||
screen_size:
|
||||
progressive: ["400"]
|
||||
screen_size: 400p
|
||||
|
||||
? Test.File2.400p
|
||||
: options:
|
||||
advanced_config:
|
||||
screen_size:
|
||||
progressive: ["400"]
|
||||
screen_size: 400p
|
||||
|
||||
? Test.File.720p
|
||||
: options:
|
||||
advanced_config:
|
||||
screen_size:
|
||||
progressive: ["400"]
|
||||
screen_size: 720p
|
||||
|
|
323
libs/guessit/test/rules/source.yml
Normal file
323
libs/guessit/test/rules/source.yml
Normal file
|
@ -0,0 +1,323 @@
|
|||
# Multiple input strings having same expected results can be chained.
|
||||
# Use - marker to check inputs that should not match results.
|
||||
? +VHS
|
||||
? -VHSAnythingElse
|
||||
? -SomeVHS stuff
|
||||
? -VH
|
||||
? -VHx
|
||||
: source: VHS
|
||||
-other: Rip
|
||||
|
||||
? +VHSRip
|
||||
? +VHS-Rip
|
||||
? +VhS_rip
|
||||
? +VHS.RIP
|
||||
? -VHS
|
||||
? -VHxRip
|
||||
: source: VHS
|
||||
other: Rip
|
||||
|
||||
? +Cam
|
||||
: source: Camera
|
||||
-other: Rip
|
||||
|
||||
? +CamRip
|
||||
? +CaM Rip
|
||||
? +Cam_Rip
|
||||
? +cam.rip
|
||||
? -Cam
|
||||
: source: Camera
|
||||
other: Rip
|
||||
|
||||
? +HDCam
|
||||
? +HD-Cam
|
||||
: source: HD Camera
|
||||
-other: Rip
|
||||
|
||||
? +HDCamRip
|
||||
? +HD-Cam.rip
|
||||
? -HDCam
|
||||
? -HD-Cam
|
||||
: source: HD Camera
|
||||
other: Rip
|
||||
|
||||
? +Telesync
|
||||
? +TS
|
||||
: source: Telesync
|
||||
-other: Rip
|
||||
|
||||
? +TelesyncRip
|
||||
? +TSRip
|
||||
? -Telesync
|
||||
? -TS
|
||||
: source: Telesync
|
||||
other: Rip
|
||||
|
||||
? +HD TS
|
||||
? -Hd.Ts # ts file extension
|
||||
? -HD.TS # ts file extension
|
||||
? +Hd-Ts
|
||||
: source: HD Telesync
|
||||
-other: Rip
|
||||
|
||||
? +HD TS Rip
|
||||
? +Hd-Ts-Rip
|
||||
? -HD TS
|
||||
? -Hd-Ts
|
||||
: source: HD Telesync
|
||||
other: Rip
|
||||
|
||||
? +Workprint
|
||||
? +workPrint
|
||||
? +WorkPrint
|
||||
? +WP
|
||||
? -Work Print
|
||||
: source: Workprint
|
||||
-other: Rip
|
||||
|
||||
? +Telecine
|
||||
? +teleCine
|
||||
? +TC
|
||||
? -Tele Cine
|
||||
: source: Telecine
|
||||
-other: Rip
|
||||
|
||||
? +Telecine Rip
|
||||
? +teleCine-Rip
|
||||
? +TC-Rip
|
||||
? -Telecine
|
||||
? -TC
|
||||
: source: Telecine
|
||||
other: Rip
|
||||
|
||||
? +HD-TELECINE
|
||||
? +HDTC
|
||||
: source: HD Telecine
|
||||
-other: Rip
|
||||
|
||||
? +HD-TCRip
|
||||
? +HD TELECINE RIP
|
||||
? -HD-TELECINE
|
||||
? -HDTC
|
||||
: source: HD Telecine
|
||||
other: Rip
|
||||
|
||||
? +PPV
|
||||
: source: Pay-per-view
|
||||
-other: Rip
|
||||
|
||||
? +ppv-rip
|
||||
? -PPV
|
||||
: source: Pay-per-view
|
||||
other: Rip
|
||||
|
||||
? -TV
|
||||
? +SDTV
|
||||
? +TV-Dub
|
||||
: source: TV
|
||||
-other: Rip
|
||||
|
||||
? +SDTVRIP
|
||||
? +Rip sd tv
|
||||
? +TvRip
|
||||
? +Rip TV
|
||||
? -TV
|
||||
? -SDTV
|
||||
: source: TV
|
||||
other: Rip
|
||||
|
||||
? +DVB
|
||||
? +pdTV
|
||||
? +Pd Tv
|
||||
: source: Digital TV
|
||||
-other: Rip
|
||||
|
||||
? +DVB-Rip
|
||||
? +DvBRiP
|
||||
? +pdtvRiP
|
||||
? +pd tv RiP
|
||||
? -DVB
|
||||
? -pdTV
|
||||
? -Pd Tv
|
||||
: source: Digital TV
|
||||
other: Rip
|
||||
|
||||
? +DVD
|
||||
? +video ts
|
||||
? +DVDR
|
||||
? +DVD 9
|
||||
? +dvd 5
|
||||
? -dvd ts
|
||||
: source: DVD
|
||||
-source: Telesync
|
||||
-other: Rip
|
||||
|
||||
? +DVD-RIP
|
||||
? -video ts
|
||||
? -DVD
|
||||
? -DVDR
|
||||
? -DVD 9
|
||||
? -dvd 5
|
||||
: source: DVD
|
||||
other: Rip
|
||||
|
||||
? +HDTV
|
||||
: source: HDTV
|
||||
-other: Rip
|
||||
|
||||
? +tv rip hd
|
||||
? +HDtv Rip
|
||||
? -HdRip # it's a Rip from non specified HD source
|
||||
? -HDTV
|
||||
: source: HDTV
|
||||
other: Rip
|
||||
|
||||
? +VOD
|
||||
: source: Video on Demand
|
||||
-other: Rip
|
||||
|
||||
? +VodRip
|
||||
? +vod rip
|
||||
? -VOD
|
||||
: source: Video on Demand
|
||||
other: Rip
|
||||
|
||||
? +webrip
|
||||
? +Web Rip
|
||||
? +webdlrip
|
||||
? +web dl rip
|
||||
? +webcap
|
||||
? +web cap
|
||||
? +webcaprip
|
||||
? +web cap rip
|
||||
: source: Web
|
||||
other: Rip
|
||||
|
||||
? +webdl
|
||||
? +Web DL
|
||||
? +webHD
|
||||
? +WEB hd
|
||||
? +web
|
||||
: source: Web
|
||||
-other: Rip
|
||||
|
||||
? +HDDVD
|
||||
? +hd dvd
|
||||
: source: HD-DVD
|
||||
-other: Rip
|
||||
|
||||
? +hdDvdRip
|
||||
? -HDDVD
|
||||
? -hd dvd
|
||||
: source: HD-DVD
|
||||
other: Rip
|
||||
|
||||
? +BluRay
|
||||
? +BD
|
||||
? +BD5
|
||||
? +BD9
|
||||
? +BD25
|
||||
? +bd50
|
||||
: source: Blu-ray
|
||||
-other: Rip
|
||||
|
||||
? +BR-Scr
|
||||
? +BR.Screener
|
||||
: source: Blu-ray
|
||||
other: [Reencoded, Screener]
|
||||
-language: pt-BR
|
||||
|
||||
? +BR-Rip
|
||||
? +BRRip
|
||||
: source: Blu-ray
|
||||
other: [Reencoded, Rip]
|
||||
-language: pt-BR
|
||||
|
||||
? +BluRay rip
|
||||
? +BDRip
|
||||
? -BluRay
|
||||
? -BD
|
||||
? -BR
|
||||
? -BR rip
|
||||
? -BD5
|
||||
? -BD9
|
||||
? -BD25
|
||||
? -bd50
|
||||
: source: Blu-ray
|
||||
other: Rip
|
||||
|
||||
? XVID.NTSC.DVDR.nfo
|
||||
: source: DVD
|
||||
-other: Rip
|
||||
|
||||
? +AHDTV
|
||||
: source: Analog HDTV
|
||||
-other: Rip
|
||||
|
||||
? +dsr
|
||||
? +dth
|
||||
: source: Satellite
|
||||
-other: Rip
|
||||
|
||||
? +dsrip
|
||||
? +ds rip
|
||||
? +dsrrip
|
||||
? +dsr rip
|
||||
? +satrip
|
||||
? +sat rip
|
||||
? +dthrip
|
||||
? +dth rip
|
||||
? -dsr
|
||||
? -dth
|
||||
: source: Satellite
|
||||
other: Rip
|
||||
|
||||
? +UHDTV
|
||||
: source: Ultra HDTV
|
||||
-other: Rip
|
||||
|
||||
? +UHDRip
|
||||
? +UHDTV Rip
|
||||
? -UHDTV
|
||||
: source: Ultra HDTV
|
||||
other: Rip
|
||||
|
||||
? UHD Bluray
|
||||
? UHD 2160p Bluray
|
||||
? UHD 8bit Bluray
|
||||
? UHD HQ 8bit Bluray
|
||||
? Ultra Bluray
|
||||
? Ultra HD Bluray
|
||||
? Bluray ULTRA
|
||||
? Bluray Ultra HD
|
||||
? Bluray UHD
|
||||
? 4K Bluray
|
||||
? 2160p Bluray
|
||||
? UHD 10bit HDR Bluray
|
||||
? UHD HDR10 Bluray
|
||||
? -HD Bluray
|
||||
? -AMERICAN ULTRA (2015) 1080p Bluray
|
||||
? -American.Ultra.2015.BRRip
|
||||
? -BRRip XviD AC3-ULTRAS
|
||||
? -UHD Proper Bluray
|
||||
: source: Ultra HD Blu-ray
|
||||
|
||||
? UHD.BRRip
|
||||
? UHD.2160p.BRRip
|
||||
? BRRip.2160p.UHD
|
||||
? BRRip.[4K-2160p-UHD]
|
||||
: source: Ultra HD Blu-ray
|
||||
other: [Reencoded, Rip]
|
||||
|
||||
? UHD.2160p.BDRip
|
||||
? BDRip.[4K-2160p-UHD]
|
||||
: source: Ultra HD Blu-ray
|
||||
other: Rip
|
||||
|
||||
? DM
|
||||
: source: Digital Master
|
||||
|
||||
? DMRIP
|
||||
? DM-RIP
|
||||
: source: Digital Master
|
||||
other: Rip
|
|
@ -30,3 +30,14 @@
|
|||
? Some.Other title/Some other title.mkv
|
||||
: title: Some Other title
|
||||
|
||||
? This T.I.T.L.E. has dots
|
||||
? This.T.I.T.L.E..has.dots
|
||||
: title: This T.I.T.L.E has dots
|
||||
|
||||
? This.T.I.T.L.E..has.dots.S01E02.This E.P.T.I.T.L.E.has.dots
|
||||
: title: This T.I.T.L.E has dots
|
||||
season: 1
|
||||
episode: 2
|
||||
episode_title: This E.P.T.I.T.L.E has dots
|
||||
type: episode
|
||||
|
||||
|
|
|
@ -6,15 +6,19 @@
|
|||
? Rv30
|
||||
? rv40
|
||||
? -xrv40
|
||||
: video_codec: Real
|
||||
: video_codec: RealVideo
|
||||
|
||||
? mpeg2
|
||||
? MPEG2
|
||||
? MPEG-2
|
||||
? mpg2
|
||||
? H262
|
||||
? H.262
|
||||
? x262
|
||||
? -mpeg
|
||||
? -mpeg 2 # Not sure if we should ignore this one ...
|
||||
? -xmpeg2
|
||||
? -mpeg2x
|
||||
: video_codec: Mpeg2
|
||||
: video_codec: MPEG-2
|
||||
|
||||
? DivX
|
||||
? -div X
|
||||
|
@ -26,19 +30,25 @@
|
|||
? XviD
|
||||
? xvid
|
||||
? -x vid
|
||||
: video_codec: XviD
|
||||
: video_codec: Xvid
|
||||
|
||||
? h263
|
||||
? x263
|
||||
? h.263
|
||||
: video_codec: H.263
|
||||
|
||||
? h264
|
||||
? x264
|
||||
? h.264
|
||||
? x.264
|
||||
? mpeg4-AVC
|
||||
? AVC
|
||||
? AVCHD
|
||||
? -MPEG-4
|
||||
? -mpeg4
|
||||
? -mpeg
|
||||
? -h 265
|
||||
? -x265
|
||||
: video_codec: h264
|
||||
: video_codec: H.264
|
||||
|
||||
? h265
|
||||
? x265
|
||||
|
@ -47,13 +57,42 @@
|
|||
? hevc
|
||||
? -h 264
|
||||
? -x264
|
||||
: video_codec: h265
|
||||
: video_codec: H.265
|
||||
|
||||
? hevc10
|
||||
? HEVC-YUV420P10
|
||||
: video_codec: h265
|
||||
video_profile: 10bit
|
||||
: video_codec: H.265
|
||||
color_depth: 10-bit
|
||||
|
||||
? h265-HP
|
||||
: video_codec: h265
|
||||
video_profile: HP
|
||||
: video_codec: H.265
|
||||
video_profile: High
|
||||
|
||||
? H.264-SC
|
||||
: video_codec: H.264
|
||||
video_profile: Scalable Video Coding
|
||||
|
||||
? mpeg4-AVC
|
||||
: video_codec: H.264
|
||||
video_profile: Advanced Video Codec High Definition
|
||||
|
||||
? AVCHD-SC
|
||||
? H.264-AVCHD-SC
|
||||
: video_codec: H.264
|
||||
video_profile:
|
||||
- Scalable Video Coding
|
||||
- Advanced Video Codec High Definition
|
||||
|
||||
? VC1
|
||||
? VC-1
|
||||
: video_codec: VC-1
|
||||
|
||||
? VP7
|
||||
: video_codec: VP7
|
||||
|
||||
? VP8
|
||||
? VP80
|
||||
: video_codec: VP8
|
||||
|
||||
? VP9
|
||||
: video_codec: VP9
|
||||
|
|
1934
libs/guessit/test/streaming_services.yaml
Normal file
1934
libs/guessit/test/streaming_services.yaml
Normal file
File diff suppressed because it is too large
Load diff
21
libs/guessit/test/suggested.json
Normal file
21
libs/guessit/test/suggested.json
Normal file
|
@ -0,0 +1,21 @@
|
|||
{
|
||||
"titles": [
|
||||
"13 Reasons Why",
|
||||
"Star Wars: Episode VII - The Force Awakens",
|
||||
"3%",
|
||||
"The 100",
|
||||
"3 Percent",
|
||||
"This is Us",
|
||||
"Open Season 2",
|
||||
"Game of Thrones",
|
||||
"The X-Files",
|
||||
"11.22.63"
|
||||
],
|
||||
"suggested": [
|
||||
"13 Reasons Why",
|
||||
"Star Wars: Episode VII - The Force Awakens",
|
||||
"The 100",
|
||||
"Open Season 2",
|
||||
"11.22.63"
|
||||
]
|
||||
}
|
|
@ -1,13 +1,14 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, pointless-string-statement
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
import six
|
||||
|
||||
from ..api import guessit, properties, GuessitException
|
||||
from ..api import guessit, properties, suggested_expected, GuessitException
|
||||
|
||||
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
|
||||
|
||||
|
@ -27,6 +28,18 @@ def test_forced_binary():
|
|||
assert ret and 'title' in ret and isinstance(ret['title'], six.binary_type)
|
||||
|
||||
|
||||
@pytest.mark.skipif(sys.version_info < (3, 4), reason="Path is not available")
|
||||
def test_pathlike_object():
|
||||
try:
|
||||
from pathlib import Path
|
||||
|
||||
path = Path('Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv')
|
||||
ret = guessit(path)
|
||||
assert ret and 'title' in ret
|
||||
except ImportError: # pragma: no-cover
|
||||
pass
|
||||
|
||||
|
||||
def test_unicode_japanese():
|
||||
ret = guessit('[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi')
|
||||
assert ret and 'title' in ret
|
||||
|
@ -61,3 +74,10 @@ def test_exception():
|
|||
assert "An internal error has occured in guessit" in str(excinfo.value)
|
||||
assert "Guessit Exception Report" in str(excinfo.value)
|
||||
assert "Please report at https://github.com/guessit-io/guessit/issues" in str(excinfo.value)
|
||||
|
||||
|
||||
def test_suggested_expected():
|
||||
with open(os.path.join(__location__, 'suggested.json'), 'r') as f:
|
||||
content = json.load(f)
|
||||
actual = suggested_expected(content['titles'])
|
||||
assert actual == content['suggested']
|
||||
|
|
|
@ -53,6 +53,14 @@ if six.PY2:
|
|||
"""
|
||||
|
||||
|
||||
def test_ensure_standard_string_class():
|
||||
class CustomStr(str):
|
||||
pass
|
||||
|
||||
ret = guessit(CustomStr('1080p'), options={'advanced': True})
|
||||
assert ret and 'screen_size' in ret and not isinstance(ret['screen_size'].input_string, CustomStr)
|
||||
|
||||
|
||||
def test_properties():
|
||||
props = properties()
|
||||
assert 'video_codec' in props.keys()
|
||||
|
|
|
@ -5,7 +5,7 @@ import os
|
|||
|
||||
import pytest
|
||||
|
||||
from ..options import get_config_file_locations, merge_configurations, load_config_file, ConfigurationException, \
|
||||
from ..options import get_options_file_locations, merge_options, load_config_file, ConfigurationException, \
|
||||
load_config
|
||||
|
||||
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
|
||||
|
@ -15,7 +15,7 @@ def test_config_locations():
|
|||
homedir = '/root'
|
||||
cwd = '/root/cwd'
|
||||
|
||||
locations = get_config_file_locations(homedir, cwd, True)
|
||||
locations = get_options_file_locations(homedir, cwd, True)
|
||||
assert len(locations) == 9
|
||||
|
||||
assert '/root/.guessit/options.json' in locations
|
||||
|
@ -34,12 +34,12 @@ def test_merge_configurations():
|
|||
c2 = {'param1': False, 'param2': True, 'param3': False}
|
||||
c3 = {'param1': False, 'param2': True, 'param3': False}
|
||||
|
||||
merged = merge_configurations(c1, c2, c3)
|
||||
merged = merge_options(c1, c2, c3)
|
||||
assert not merged['param1']
|
||||
assert merged['param2']
|
||||
assert not merged['param3']
|
||||
|
||||
merged = merge_configurations(c3, c2, c1)
|
||||
merged = merge_options(c3, c2, c1)
|
||||
assert merged['param1']
|
||||
assert merged['param2']
|
||||
assert not merged['param3']
|
||||
|
@ -50,28 +50,49 @@ def test_merge_configurations_lists():
|
|||
c2 = {'param1': [2], 'param2': True, 'param3': False}
|
||||
c3 = {'param1': [3], 'param2': True, 'param3': False}
|
||||
|
||||
merged = merge_configurations(c1, c2, c3)
|
||||
merged = merge_options(c1, c2, c3)
|
||||
assert merged['param1'] == [1, 2, 3]
|
||||
assert merged['param2']
|
||||
assert not merged['param3']
|
||||
|
||||
merged = merge_configurations(c3, c2, c1)
|
||||
merged = merge_options(c3, c2, c1)
|
||||
assert merged['param1'] == [3, 2, 1]
|
||||
assert merged['param2']
|
||||
assert not merged['param3']
|
||||
|
||||
|
||||
def test_merge_configurations_deep():
|
||||
c1 = {'param1': [1], 'param2': {'d1': [1]}, 'param3': False}
|
||||
c2 = {'param1': [2], 'param2': {'d1': [2]}, 'param3': False}
|
||||
c3 = {'param1': [3], 'param2': {'d3': [3]}, 'param3': False}
|
||||
|
||||
merged = merge_options(c1, c2, c3)
|
||||
assert merged['param1'] == [1, 2, 3]
|
||||
assert merged['param2']['d1'] == [1, 2]
|
||||
assert merged['param2']['d3'] == [3]
|
||||
assert 'd2' not in merged['param2']
|
||||
assert not merged['param3']
|
||||
|
||||
merged = merge_options(c3, c2, c1)
|
||||
assert merged['param1'] == [3, 2, 1]
|
||||
assert merged['param2']
|
||||
assert merged['param2']['d1'] == [2, 1]
|
||||
assert 'd2' not in merged['param2']
|
||||
assert merged['param2']['d3'] == [3]
|
||||
assert not merged['param3']
|
||||
|
||||
|
||||
def test_merge_configurations_pristine_all():
|
||||
c1 = {'param1': [1], 'param2': True, 'param3': False}
|
||||
c2 = {'param1': [2], 'param2': True, 'param3': False, 'pristine': True}
|
||||
c3 = {'param1': [3], 'param2': True, 'param3': False}
|
||||
|
||||
merged = merge_configurations(c1, c2, c3)
|
||||
merged = merge_options(c1, c2, c3)
|
||||
assert merged['param1'] == [2, 3]
|
||||
assert merged['param2']
|
||||
assert not merged['param3']
|
||||
|
||||
merged = merge_configurations(c3, c2, c1)
|
||||
merged = merge_options(c3, c2, c1)
|
||||
assert merged['param1'] == [2, 1]
|
||||
assert merged['param2']
|
||||
assert not merged['param3']
|
||||
|
@ -82,7 +103,18 @@ def test_merge_configurations_pristine_properties():
|
|||
c2 = {'param1': [2], 'param2': True, 'param3': False, 'pristine': ['param2', 'param3']}
|
||||
c3 = {'param1': [3], 'param2': True, 'param3': False}
|
||||
|
||||
merged = merge_configurations(c1, c2, c3)
|
||||
merged = merge_options(c1, c2, c3)
|
||||
assert merged['param1'] == [1, 2, 3]
|
||||
assert merged['param2']
|
||||
assert not merged['param3']
|
||||
|
||||
|
||||
def test_merge_configurations_pristine_properties_deep():
|
||||
c1 = {'param1': [1], 'param2': {'d1': False}, 'param3': True}
|
||||
c2 = {'param1': [2], 'param2': {'d1': True}, 'param3': False, 'pristine': ['param2', 'param3']}
|
||||
c3 = {'param1': [3], 'param2': {'d1': True}, 'param3': False}
|
||||
|
||||
merged = merge_options(c1, c2, c3)
|
||||
assert merged['param1'] == [1, 2, 3]
|
||||
assert merged['param2']
|
||||
assert not merged['param3']
|
||||
|
@ -93,7 +125,7 @@ def test_merge_configurations_pristine_properties2():
|
|||
c2 = {'param1': [2], 'param2': True, 'param3': False, 'pristine': ['param1', 'param2', 'param3']}
|
||||
c3 = {'param1': [3], 'param2': True, 'param3': False}
|
||||
|
||||
merged = merge_configurations(c1, c2, c3)
|
||||
merged = merge_options(c1, c2, c3)
|
||||
assert merged['param1'] == [2, 3]
|
||||
assert merged['param2']
|
||||
assert not merged['param3']
|
||||
|
@ -119,24 +151,25 @@ def test_load_config_file():
|
|||
|
||||
|
||||
def test_load_config():
|
||||
config = load_config({'no_embedded_config': True, 'param1': 'test',
|
||||
config = load_config({'no_default_config': True, 'param1': 'test',
|
||||
'config': [os.path.join(__location__, 'config', 'test.yml')]})
|
||||
|
||||
assert config['param1'] == 'test'
|
||||
assert not config.get('param1')
|
||||
|
||||
assert config.get('advanced_config') # advanced_config is still loaded from default
|
||||
assert config['expected_title'] == ['The 100', 'OSS 117']
|
||||
assert config['yaml'] is True
|
||||
|
||||
config = load_config({'no_embedded_config': True, 'param1': 'test'})
|
||||
config = load_config({'no_default_config': True, 'param1': 'test'})
|
||||
|
||||
assert config['param1'] == 'test'
|
||||
assert not config.get('param1')
|
||||
|
||||
assert 'expected_title' not in config
|
||||
assert 'yaml' not in config
|
||||
|
||||
config = load_config({'no_embedded_config': True, 'param1': 'test', 'config': ['false']})
|
||||
config = load_config({'no_default_config': True, 'param1': 'test', 'config': ['false']})
|
||||
|
||||
assert config['param1'] == 'test'
|
||||
assert not config.get('param1')
|
||||
|
||||
assert 'expected_title' not in config
|
||||
assert 'yaml' not in config
|
||||
|
|
|
@ -2,36 +2,24 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name
|
||||
import logging
|
||||
|
||||
import os
|
||||
# io.open supports encoding= in python 2.7
|
||||
from io import open # pylint: disable=redefined-builtin
|
||||
import os
|
||||
import yaml
|
||||
|
||||
import six
|
||||
|
||||
import babelfish
|
||||
import pytest
|
||||
|
||||
import six # pylint:disable=wrong-import-order
|
||||
import yaml # pylint:disable=wrong-import-order
|
||||
from rebulk.remodule import re
|
||||
from rebulk.utils import is_iterable
|
||||
|
||||
from ..options import parse_options, load_config
|
||||
from ..yamlutils import OrderedDictYAMLLoader
|
||||
from .. import guessit
|
||||
|
||||
from ..options import parse_options
|
||||
from ..yamlutils import OrderedDictYAMLLoader
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
|
||||
|
||||
filename_predicate = None
|
||||
string_predicate = None
|
||||
|
||||
|
||||
# filename_predicate = lambda filename: 'episode_title' in filename
|
||||
# string_predicate = lambda string: '-DVD.BlablaBla.Fix.Blablabla.XVID' in string
|
||||
|
||||
|
||||
class EntryResult(object):
|
||||
def __init__(self, string, negates=False):
|
||||
|
@ -64,10 +52,10 @@ class EntryResult(object):
|
|||
def __repr__(self):
|
||||
if self.ok:
|
||||
return self.string + ': OK!'
|
||||
elif self.warning:
|
||||
if self.warning:
|
||||
return '%s%s: WARNING! (valid=%i, extra=%i)' % ('-' if self.negates else '', self.string, len(self.valid),
|
||||
len(self.extra))
|
||||
elif self.error:
|
||||
if self.error:
|
||||
return '%s%s: ERROR! (valid=%i, missing=%i, different=%i, extra=%i, others=%i)' % \
|
||||
('-' if self.negates else '', self.string, len(self.valid), len(self.missing), len(self.different),
|
||||
len(self.extra), len(self.others))
|
||||
|
@ -136,9 +124,51 @@ class TestYml(object):
|
|||
Use $ marker to check inputs that should not match results.
|
||||
"""
|
||||
|
||||
options_re = re.compile(r'^([ \+-]+)(.*)')
|
||||
options_re = re.compile(r'^([ +-]+)(.*)')
|
||||
|
||||
files, ids = files_and_ids(filename_predicate)
|
||||
def _get_unique_id(self, collection, base_id):
|
||||
ret = base_id
|
||||
i = 2
|
||||
while ret in collection:
|
||||
suffix = "-" + str(i)
|
||||
ret = base_id + suffix
|
||||
i += 1
|
||||
return ret
|
||||
|
||||
def pytest_generate_tests(self, metafunc):
|
||||
if 'yml_test_case' in metafunc.fixturenames:
|
||||
entries = []
|
||||
entry_ids = []
|
||||
entry_set = set()
|
||||
|
||||
for filename, _ in zip(*files_and_ids()):
|
||||
with open(os.path.join(__location__, filename), 'r', encoding='utf-8') as infile:
|
||||
data = yaml.load(infile, OrderedDictYAMLLoader)
|
||||
|
||||
last_expected = None
|
||||
for string, expected in reversed(list(data.items())):
|
||||
if expected is None:
|
||||
data[string] = last_expected
|
||||
else:
|
||||
last_expected = expected
|
||||
|
||||
default = None
|
||||
try:
|
||||
default = data['__default__']
|
||||
del data['__default__']
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
for string, expected in data.items():
|
||||
TestYml.set_default(expected, default)
|
||||
string = TestYml.fix_encoding(string, expected)
|
||||
|
||||
entries.append((filename, string, expected))
|
||||
unique_id = self._get_unique_id(entry_set, '[' + filename + '] ' + str(string))
|
||||
entry_set.add(unique_id)
|
||||
entry_ids.append(unique_id)
|
||||
|
||||
metafunc.parametrize('yml_test_case', entries, ids=entry_ids)
|
||||
|
||||
@staticmethod
|
||||
def set_default(expected, default):
|
||||
|
@ -147,34 +177,8 @@ class TestYml(object):
|
|||
if k not in expected:
|
||||
expected[k] = v
|
||||
|
||||
@pytest.mark.parametrize('filename', files, ids=ids)
|
||||
def test(self, filename, caplog):
|
||||
caplog.setLevel(logging.INFO)
|
||||
with open(os.path.join(__location__, filename), 'r', encoding='utf-8') as infile:
|
||||
data = yaml.load(infile, OrderedDictYAMLLoader)
|
||||
entries = Results()
|
||||
|
||||
last_expected = None
|
||||
for string, expected in reversed(list(data.items())):
|
||||
if expected is None:
|
||||
data[string] = last_expected
|
||||
else:
|
||||
last_expected = expected
|
||||
|
||||
default = None
|
||||
try:
|
||||
default = data['__default__']
|
||||
del data['__default__']
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
for string, expected in data.items():
|
||||
TestYml.set_default(expected, default)
|
||||
entry = self.check_data(filename, string, expected)
|
||||
entries.append(entry)
|
||||
entries.assert_ok()
|
||||
|
||||
def check_data(self, filename, string, expected):
|
||||
@classmethod
|
||||
def fix_encoding(cls, string, expected):
|
||||
if six.PY2:
|
||||
if isinstance(string, six.text_type):
|
||||
string = string.encode('utf-8')
|
||||
|
@ -187,16 +191,23 @@ class TestYml(object):
|
|||
expected[k] = v
|
||||
if not isinstance(string, str):
|
||||
string = str(string)
|
||||
if not string_predicate or string_predicate(string): # pylint: disable=not-callable
|
||||
entry = self.check(string, expected)
|
||||
if entry.ok:
|
||||
logger.debug('[' + filename + '] ' + str(entry))
|
||||
elif entry.warning:
|
||||
logger.warning('[' + filename + '] ' + str(entry))
|
||||
elif entry.error:
|
||||
logger.error('[' + filename + '] ' + str(entry))
|
||||
for line in entry.details:
|
||||
logger.error('[' + filename + '] ' + ' ' * 4 + line)
|
||||
return string
|
||||
|
||||
def test_entry(self, yml_test_case):
|
||||
filename, string, expected = yml_test_case
|
||||
result = self.check_data(filename, string, expected)
|
||||
assert not result.error
|
||||
|
||||
def check_data(self, filename, string, expected):
|
||||
entry = self.check(string, expected)
|
||||
if entry.ok:
|
||||
logger.debug('[%s] %s', filename, entry)
|
||||
elif entry.warning:
|
||||
logger.warning('[%s] %s', filename, entry)
|
||||
elif entry.error:
|
||||
logger.error('[%s] %s', filename, entry)
|
||||
for line in entry.details:
|
||||
logger.error('[%s] %s', filename, ' ' * 4 + line)
|
||||
return entry
|
||||
|
||||
def check(self, string, expected):
|
||||
|
@ -207,12 +218,10 @@ class TestYml(object):
|
|||
options = {}
|
||||
if not isinstance(options, dict):
|
||||
options = parse_options(options)
|
||||
options['config'] = False
|
||||
options = load_config(options)
|
||||
try:
|
||||
result = guessit(string, options)
|
||||
except Exception as exc:
|
||||
logger.error('[' + string + '] Exception: ' + str(exc))
|
||||
logger.error('[%s] Exception: %s', string, exc)
|
||||
raise exc
|
||||
|
||||
entry = EntryResult(string, negates)
|
||||
|
@ -258,10 +267,10 @@ class TestYml(object):
|
|||
return False
|
||||
if isinstance(next(iter(values)), babelfish.Language):
|
||||
# pylint: disable=no-member
|
||||
expecteds = set([babelfish.Language.fromguessit(expected) for expected in expecteds])
|
||||
expecteds = {babelfish.Language.fromguessit(expected) for expected in expecteds}
|
||||
elif isinstance(next(iter(values)), babelfish.Country):
|
||||
# pylint: disable=no-member
|
||||
expecteds = set([babelfish.Country.fromguessit(expected) for expected in expecteds])
|
||||
expecteds = {babelfish.Country.fromguessit(expected) for expected in expecteds}
|
||||
return values == expecteds
|
||||
|
||||
def check_expected(self, result, expected, entry):
|
||||
|
@ -274,10 +283,10 @@ class TestYml(object):
|
|||
if negates_key:
|
||||
entry.valid.append((expected_key, expected_value))
|
||||
else:
|
||||
entry.different.append((expected_key, expected_value, result[expected_key]))
|
||||
entry.different.append((expected_key, expected_value, result[result_key]))
|
||||
else:
|
||||
if negates_key:
|
||||
entry.different.append((expected_key, expected_value, result[expected_key]))
|
||||
entry.different.append((expected_key, expected_value, result[result_key]))
|
||||
else:
|
||||
entry.valid.append((expected_key, expected_value))
|
||||
elif not negates_key:
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -3,23 +3,26 @@
|
|||
"""
|
||||
Options
|
||||
"""
|
||||
|
||||
try:
|
||||
from collections import OrderedDict
|
||||
except ImportError: # pragma: no-cover
|
||||
from ordereddict import OrderedDict # pylint:disable=import-error
|
||||
import babelfish
|
||||
|
||||
import yaml
|
||||
import yaml # pylint:disable=wrong-import-order
|
||||
|
||||
from .rules.common.quantity import BitRate, FrameRate, Size
|
||||
|
||||
|
||||
class OrderedDictYAMLLoader(yaml.Loader):
|
||||
class OrderedDictYAMLLoader(yaml.SafeLoader):
|
||||
"""
|
||||
A YAML loader that loads mappings into ordered dictionaries.
|
||||
From https://gist.github.com/enaeseth/844388
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
yaml.Loader.__init__(self, *args, **kwargs)
|
||||
yaml.SafeLoader.__init__(self, *args, **kwargs)
|
||||
|
||||
self.add_constructor(u'tag:yaml.org,2002:map', type(self).construct_yaml_map)
|
||||
self.add_constructor(u'tag:yaml.org,2002:omap', type(self).construct_yaml_map)
|
||||
|
@ -55,17 +58,24 @@ class CustomDumper(yaml.SafeDumper):
|
|||
"""
|
||||
Custom YAML Dumper.
|
||||
"""
|
||||
pass
|
||||
pass # pylint:disable=unnecessary-pass
|
||||
|
||||
|
||||
def default_representer(dumper, data):
|
||||
"""Default representer"""
|
||||
return dumper.represent_str(str(data))
|
||||
|
||||
|
||||
CustomDumper.add_representer(babelfish.Language, default_representer)
|
||||
CustomDumper.add_representer(babelfish.Country, default_representer)
|
||||
CustomDumper.add_representer(BitRate, default_representer)
|
||||
CustomDumper.add_representer(FrameRate, default_representer)
|
||||
CustomDumper.add_representer(Size, default_representer)
|
||||
|
||||
|
||||
def ordered_dict_representer(dumper, data):
|
||||
"""OrderedDict representer"""
|
||||
return dumper.represent_dict(data)
|
||||
return dumper.represent_mapping('tag:yaml.org,2002:map', data.items())
|
||||
|
||||
|
||||
CustomDumper.add_representer(OrderedDict, ordered_dict_representer)
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
Version module
|
||||
"""
|
||||
# pragma: no cover
|
||||
__version__ = '0.9.0'
|
||||
__version__ = '2.0.1'
|
||||
|
|
217
libs/rebulk/builder.py
Normal file
217
libs/rebulk/builder.py
Normal file
|
@ -0,0 +1,217 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Base builder class for Rebulk
|
||||
"""
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from copy import deepcopy
|
||||
from logging import getLogger
|
||||
|
||||
from six import add_metaclass
|
||||
|
||||
from .loose import set_defaults
|
||||
from .pattern import RePattern, StringPattern, FunctionalPattern
|
||||
|
||||
log = getLogger(__name__).log
|
||||
|
||||
|
||||
@add_metaclass(ABCMeta)
|
||||
class Builder(object):
|
||||
"""
|
||||
Base builder class for patterns
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._defaults = {}
|
||||
self._regex_defaults = {}
|
||||
self._string_defaults = {}
|
||||
self._functional_defaults = {}
|
||||
self._chain_defaults = {}
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Reset all defaults.
|
||||
|
||||
:return:
|
||||
"""
|
||||
self.__init__()
|
||||
|
||||
def defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for all patterns
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(kwargs, self._defaults, override=True)
|
||||
return self
|
||||
|
||||
def regex_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(kwargs, self._regex_defaults, override=True)
|
||||
return self
|
||||
|
||||
def string_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for string patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(kwargs, self._string_defaults, override=True)
|
||||
return self
|
||||
|
||||
def functional_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(kwargs, self._functional_defaults, override=True)
|
||||
return self
|
||||
|
||||
def chain_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for patterns chain.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(kwargs, self._chain_defaults, override=True)
|
||||
return self
|
||||
|
||||
def build_re(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new regular expression pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._regex_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return RePattern(*pattern, **kwargs)
|
||||
|
||||
def build_string(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new string pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._string_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return StringPattern(*pattern, **kwargs)
|
||||
|
||||
def build_functional(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new functional pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._functional_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return FunctionalPattern(*pattern, **kwargs)
|
||||
|
||||
def build_chain(self, **kwargs):
|
||||
"""
|
||||
Builds a new patterns chain
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
from .chain import Chain
|
||||
set_defaults(self._chain_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
chain = Chain(self, **kwargs)
|
||||
chain._defaults = deepcopy(self._defaults) # pylint: disable=protected-access
|
||||
chain._regex_defaults = deepcopy(self._regex_defaults) # pylint: disable=protected-access
|
||||
chain._functional_defaults = deepcopy(self._functional_defaults) # pylint: disable=protected-access
|
||||
chain._string_defaults = deepcopy(self._string_defaults) # pylint: disable=protected-access
|
||||
chain._chain_defaults = deepcopy(self._chain_defaults) # pylint: disable=protected-access
|
||||
return chain
|
||||
|
||||
@abstractmethod
|
||||
def pattern(self, *pattern):
|
||||
"""
|
||||
Register a list of Pattern instance
|
||||
:param pattern:
|
||||
:return:
|
||||
"""
|
||||
pass
|
||||
|
||||
def regex(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add re pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
return self.pattern(self.build_re(*pattern, **kwargs))
|
||||
|
||||
def string(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add string pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
return self.pattern(self.build_string(*pattern, **kwargs))
|
||||
|
||||
def functional(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add functional pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
functional = self.build_functional(*pattern, **kwargs)
|
||||
return self.pattern(functional)
|
||||
|
||||
def chain(self, **kwargs):
|
||||
"""
|
||||
Add patterns chain, using configuration of this rebulk
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
chain = self.build_chain(**kwargs)
|
||||
self.pattern(chain)
|
||||
return chain
|
|
@ -6,9 +6,10 @@ Chain patterns and handle repetiting capture group
|
|||
# pylint: disable=super-init-not-called
|
||||
import itertools
|
||||
|
||||
from .loose import call, set_defaults
|
||||
from .builder import Builder
|
||||
from .loose import call
|
||||
from .match import Match, Matches
|
||||
from .pattern import Pattern, filter_match_kwargs
|
||||
from .pattern import Pattern, filter_match_kwargs, BasePattern
|
||||
from .remodule import re
|
||||
|
||||
|
||||
|
@ -19,150 +20,46 @@ class _InvalidChainException(Exception):
|
|||
pass
|
||||
|
||||
|
||||
class Chain(Pattern):
|
||||
class Chain(Pattern, Builder):
|
||||
"""
|
||||
Definition of a pattern chain to search for.
|
||||
"""
|
||||
|
||||
def __init__(self, rebulk, chain_breaker=None, **kwargs):
|
||||
call(super(Chain, self).__init__, **kwargs)
|
||||
def __init__(self, parent, chain_breaker=None, **kwargs):
|
||||
Builder.__init__(self)
|
||||
call(Pattern.__init__, self, **kwargs)
|
||||
self._kwargs = kwargs
|
||||
self._match_kwargs = filter_match_kwargs(kwargs)
|
||||
self._defaults = {}
|
||||
self._regex_defaults = {}
|
||||
self._string_defaults = {}
|
||||
self._functional_defaults = {}
|
||||
if callable(chain_breaker):
|
||||
self.chain_breaker = chain_breaker
|
||||
else:
|
||||
self.chain_breaker = None
|
||||
self.rebulk = rebulk
|
||||
self.parent = parent
|
||||
self.parts = []
|
||||
|
||||
def defaults(self, **kwargs):
|
||||
def pattern(self, *pattern):
|
||||
"""
|
||||
Define default keyword arguments for all patterns
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._defaults = kwargs
|
||||
return self
|
||||
|
||||
def regex_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._regex_defaults = kwargs
|
||||
return self
|
||||
|
||||
def string_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for string patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._string_defaults = kwargs
|
||||
return self
|
||||
|
||||
def functional_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._functional_defaults = kwargs
|
||||
return self
|
||||
|
||||
def chain(self):
|
||||
"""
|
||||
Add patterns chain, using configuration from this chain
|
||||
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
# pylint: disable=protected-access
|
||||
chain = self.rebulk.chain(**self._kwargs)
|
||||
chain._defaults = dict(self._defaults)
|
||||
chain._regex_defaults = dict(self._regex_defaults)
|
||||
chain._functional_defaults = dict(self._functional_defaults)
|
||||
chain._string_defaults = dict(self._string_defaults)
|
||||
return chain
|
||||
|
||||
def regex(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add re pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._kwargs, kwargs)
|
||||
set_defaults(self._regex_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
pattern = self.rebulk.build_re(*pattern, **kwargs)
|
||||
part = ChainPart(self, pattern)
|
||||
self.parts.append(part)
|
||||
return part
|
||||
|
||||
def functional(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add functional pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._kwargs, kwargs)
|
||||
set_defaults(self._functional_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
pattern = self.rebulk.build_functional(*pattern, **kwargs)
|
||||
part = ChainPart(self, pattern)
|
||||
self.parts.append(part)
|
||||
return part
|
||||
|
||||
def string(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add string pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._kwargs, kwargs)
|
||||
set_defaults(self._functional_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
pattern = self.rebulk.build_string(*pattern, **kwargs)
|
||||
part = ChainPart(self, pattern)
|
||||
if not pattern:
|
||||
raise ValueError("One pattern should be given to the chain")
|
||||
if len(pattern) > 1:
|
||||
raise ValueError("Only one pattern can be given to the chain")
|
||||
part = ChainPart(self, pattern[0])
|
||||
self.parts.append(part)
|
||||
return part
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Close chain builder to continue registering other pattern
|
||||
|
||||
:return:
|
||||
:rtype:
|
||||
Deeply close the chain
|
||||
:return: Rebulk instance
|
||||
"""
|
||||
return self.rebulk
|
||||
parent = self.parent
|
||||
while isinstance(parent, Chain):
|
||||
parent = parent.parent
|
||||
return parent
|
||||
|
||||
def _match(self, pattern, input_string, context=None):
|
||||
# pylint: disable=too-many-locals,too-many-nested-blocks
|
||||
|
@ -173,42 +70,20 @@ class Chain(Pattern):
|
|||
chain_found = False
|
||||
current_chain_matches = []
|
||||
valid_chain = True
|
||||
is_chain_start = True
|
||||
for chain_part in self.parts:
|
||||
try:
|
||||
chain_part_matches, raw_chain_part_matches = Chain._match_chain_part(is_chain_start, chain_part,
|
||||
chain_input_string,
|
||||
context)
|
||||
|
||||
Chain._fix_matches_offset(chain_part_matches, input_string, offset)
|
||||
Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset)
|
||||
|
||||
if raw_chain_part_matches:
|
||||
grouped_matches_dict = dict()
|
||||
for match_index, match in itertools.groupby(chain_part_matches,
|
||||
lambda m: m.match_index):
|
||||
grouped_matches_dict[match_index] = list(match)
|
||||
|
||||
grouped_raw_matches_dict = dict()
|
||||
for match_index, raw_match in itertools.groupby(raw_chain_part_matches,
|
||||
lambda m: m.match_index):
|
||||
grouped_raw_matches_dict[match_index] = list(raw_match)
|
||||
|
||||
for match_index, grouped_raw_matches in grouped_raw_matches_dict.items():
|
||||
chain_found = True
|
||||
offset = grouped_raw_matches[-1].raw_end
|
||||
chain_input_string = input_string[offset:]
|
||||
if not chain_part.is_hidden:
|
||||
grouped_matches = grouped_matches_dict.get(match_index, [])
|
||||
if self._chain_breaker_eval(current_chain_matches + grouped_matches):
|
||||
current_chain_matches.extend(grouped_matches)
|
||||
chain_part_matches, raw_chain_part_matches = chain_part.matches(chain_input_string,
|
||||
context,
|
||||
with_raw_matches=True)
|
||||
|
||||
chain_found, chain_input_string, offset = \
|
||||
self._to_next_chain_part(chain_part, chain_part_matches, raw_chain_part_matches, chain_found,
|
||||
input_string, chain_input_string, offset, current_chain_matches)
|
||||
except _InvalidChainException:
|
||||
valid_chain = False
|
||||
if current_chain_matches:
|
||||
offset = current_chain_matches[0].raw_end
|
||||
break
|
||||
is_chain_start = False
|
||||
if not chain_found:
|
||||
break
|
||||
if current_chain_matches and valid_chain:
|
||||
|
@ -217,38 +92,66 @@ class Chain(Pattern):
|
|||
|
||||
return chain_matches
|
||||
|
||||
def _match_parent(self, match, yield_parent):
|
||||
def _to_next_chain_part(self, chain_part, chain_part_matches, raw_chain_part_matches, chain_found,
|
||||
input_string, chain_input_string, offset, current_chain_matches):
|
||||
Chain._fix_matches_offset(chain_part_matches, input_string, offset)
|
||||
Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset)
|
||||
|
||||
if raw_chain_part_matches:
|
||||
grouped_matches_dict = self._group_by_match_index(chain_part_matches)
|
||||
grouped_raw_matches_dict = self._group_by_match_index(raw_chain_part_matches)
|
||||
|
||||
for match_index, grouped_raw_matches in grouped_raw_matches_dict.items():
|
||||
chain_found = True
|
||||
offset = grouped_raw_matches[-1].raw_end
|
||||
chain_input_string = input_string[offset:]
|
||||
|
||||
if not chain_part.is_hidden:
|
||||
grouped_matches = grouped_matches_dict.get(match_index, [])
|
||||
if self._chain_breaker_eval(current_chain_matches + grouped_matches):
|
||||
current_chain_matches.extend(grouped_matches)
|
||||
return chain_found, chain_input_string, offset
|
||||
|
||||
def _process_match(self, match, match_index, child=False):
|
||||
"""
|
||||
Handle a parent match
|
||||
Handle a match
|
||||
:param match:
|
||||
:type match:
|
||||
:param yield_parent:
|
||||
:type yield_parent:
|
||||
:param match_index:
|
||||
:type match_index:
|
||||
:param child:
|
||||
:type child:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
ret = super(Chain, self)._match_parent(match, yield_parent)
|
||||
original_children = Matches(match.children)
|
||||
original_end = match.end
|
||||
while not ret and match.children:
|
||||
last_pattern = match.children[-1].pattern
|
||||
last_pattern_children = [child for child in match.children if child.pattern == last_pattern]
|
||||
last_pattern_groups_iter = itertools.groupby(last_pattern_children, lambda child: child.match_index)
|
||||
last_pattern_groups = {}
|
||||
for index, matches in last_pattern_groups_iter:
|
||||
last_pattern_groups[index] = list(matches)
|
||||
# pylint: disable=too-many-locals
|
||||
ret = super(Chain, self)._process_match(match, match_index, child=child)
|
||||
if ret:
|
||||
return True
|
||||
|
||||
for index in reversed(list(last_pattern_groups)):
|
||||
last_matches = list(last_pattern_groups[index])
|
||||
for last_match in last_matches:
|
||||
match.children.remove(last_match)
|
||||
match.end = match.children[-1].end if match.children else match.start
|
||||
ret = super(Chain, self)._match_parent(match, yield_parent)
|
||||
if ret:
|
||||
return True
|
||||
match.children = original_children
|
||||
match.end = original_end
|
||||
return ret
|
||||
if match.children:
|
||||
last_pattern = match.children[-1].pattern
|
||||
last_pattern_groups = self._group_by_match_index(
|
||||
[child_ for child_ in match.children if child_.pattern == last_pattern]
|
||||
)
|
||||
|
||||
if last_pattern_groups:
|
||||
original_children = Matches(match.children)
|
||||
original_end = match.end
|
||||
|
||||
for index in reversed(list(last_pattern_groups)):
|
||||
last_matches = last_pattern_groups[index]
|
||||
for last_match in last_matches:
|
||||
match.children.remove(last_match)
|
||||
match.end = match.children[-1].end if match.children else match.start
|
||||
ret = super(Chain, self)._process_match(match, match_index, child=child)
|
||||
if ret:
|
||||
return True
|
||||
|
||||
match.children = original_children
|
||||
match.end = original_end
|
||||
|
||||
return False
|
||||
|
||||
def _build_chain_match(self, current_chain_matches, input_string):
|
||||
start = None
|
||||
|
@ -282,46 +185,11 @@ class Chain(Pattern):
|
|||
Chain._fix_matches_offset(chain_part_match.children, input_string, offset)
|
||||
|
||||
@staticmethod
|
||||
def _match_chain_part(is_chain_start, chain_part, chain_input_string, context):
|
||||
chain_part_matches, raw_chain_part_matches = chain_part.pattern.matches(chain_input_string, context,
|
||||
with_raw_matches=True)
|
||||
chain_part_matches = Chain._truncate_chain_part_matches(is_chain_start, chain_part_matches, chain_part,
|
||||
chain_input_string)
|
||||
raw_chain_part_matches = Chain._truncate_chain_part_matches(is_chain_start, raw_chain_part_matches, chain_part,
|
||||
chain_input_string)
|
||||
|
||||
Chain._validate_chain_part_matches(raw_chain_part_matches, chain_part)
|
||||
return chain_part_matches, raw_chain_part_matches
|
||||
|
||||
@staticmethod
|
||||
def _truncate_chain_part_matches(is_chain_start, chain_part_matches, chain_part, chain_input_string):
|
||||
if not chain_part_matches:
|
||||
return chain_part_matches
|
||||
|
||||
if not is_chain_start:
|
||||
separator = chain_input_string[0:chain_part_matches[0].initiator.raw_start]
|
||||
if separator:
|
||||
return []
|
||||
|
||||
j = 1
|
||||
for i in range(0, len(chain_part_matches) - 1):
|
||||
separator = chain_input_string[chain_part_matches[i].initiator.raw_end:
|
||||
chain_part_matches[i + 1].initiator.raw_start]
|
||||
if separator:
|
||||
break
|
||||
j += 1
|
||||
truncated = chain_part_matches[:j]
|
||||
if chain_part.repeater_end is not None:
|
||||
truncated = [m for m in truncated if m.match_index < chain_part.repeater_end]
|
||||
return truncated
|
||||
|
||||
@staticmethod
|
||||
def _validate_chain_part_matches(chain_part_matches, chain_part):
|
||||
max_match_index = -1
|
||||
if chain_part_matches:
|
||||
max_match_index = max([m.match_index for m in chain_part_matches])
|
||||
if max_match_index + 1 < chain_part.repeater_start:
|
||||
raise _InvalidChainException
|
||||
def _group_by_match_index(matches):
|
||||
grouped_matches_dict = dict()
|
||||
for match_index, match in itertools.groupby(matches, lambda m: m.match_index):
|
||||
grouped_matches_dict[match_index] = list(match)
|
||||
return grouped_matches_dict
|
||||
|
||||
@property
|
||||
def match_options(self):
|
||||
|
@ -338,7 +206,7 @@ class Chain(Pattern):
|
|||
return "<%s%s:%s>" % (self.__class__.__name__, defined, self.parts)
|
||||
|
||||
|
||||
class ChainPart(object):
|
||||
class ChainPart(BasePattern):
|
||||
"""
|
||||
Part of a pattern chain.
|
||||
"""
|
||||
|
@ -350,6 +218,51 @@ class ChainPart(object):
|
|||
self.repeater_end = 1
|
||||
self._hidden = False
|
||||
|
||||
@property
|
||||
def _is_chain_start(self):
|
||||
return self._chain.parts[0] == self
|
||||
|
||||
def matches(self, input_string, context=None, with_raw_matches=False):
|
||||
matches, raw_matches = self.pattern.matches(input_string, context=context, with_raw_matches=True)
|
||||
|
||||
matches = self._truncate_repeater(matches, input_string)
|
||||
raw_matches = self._truncate_repeater(raw_matches, input_string)
|
||||
|
||||
self._validate_repeater(raw_matches)
|
||||
|
||||
if with_raw_matches:
|
||||
return matches, raw_matches
|
||||
|
||||
return matches
|
||||
|
||||
def _truncate_repeater(self, matches, input_string):
|
||||
if not matches:
|
||||
return matches
|
||||
|
||||
if not self._is_chain_start:
|
||||
separator = input_string[0:matches[0].initiator.raw_start]
|
||||
if separator:
|
||||
return []
|
||||
|
||||
j = 1
|
||||
for i in range(0, len(matches) - 1):
|
||||
separator = input_string[matches[i].initiator.raw_end:
|
||||
matches[i + 1].initiator.raw_start]
|
||||
if separator:
|
||||
break
|
||||
j += 1
|
||||
truncated = matches[:j]
|
||||
if self.repeater_end is not None:
|
||||
truncated = [m for m in truncated if m.match_index < self.repeater_end]
|
||||
return truncated
|
||||
|
||||
def _validate_repeater(self, matches):
|
||||
max_match_index = -1
|
||||
if matches:
|
||||
max_match_index = max([m.match_index for m in matches])
|
||||
if max_match_index + 1 < self.repeater_start:
|
||||
raise _InvalidChainException
|
||||
|
||||
def chain(self):
|
||||
"""
|
||||
Add patterns chain, using configuration from this chain
|
||||
|
|
|
@ -15,9 +15,19 @@ def formatters(*chained_formatters):
|
|||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
|
||||
def formatters_chain(input_string): # pylint:disable=missing-docstring
|
||||
for chained_formatter in chained_formatters:
|
||||
input_string = chained_formatter(input_string)
|
||||
return input_string
|
||||
|
||||
return formatters_chain
|
||||
|
||||
|
||||
def default_formatter(input_string):
|
||||
"""
|
||||
Default formatter
|
||||
:param input_string:
|
||||
:return:
|
||||
"""
|
||||
return input_string
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
"""
|
||||
Introspect rebulk object to retrieve capabilities.
|
||||
"""
|
||||
from abc import ABCMeta, abstractproperty
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from collections import defaultdict
|
||||
|
||||
import six
|
||||
|
@ -16,7 +16,8 @@ class Description(object):
|
|||
"""
|
||||
Abstract class for a description.
|
||||
"""
|
||||
@abstractproperty
|
||||
@property
|
||||
@abstractmethod
|
||||
def properties(self): # pragma: no cover
|
||||
"""
|
||||
Properties of described object.
|
||||
|
|
|
@ -3,8 +3,18 @@
|
|||
"""
|
||||
Various utilities functions
|
||||
"""
|
||||
import inspect
|
||||
|
||||
import sys
|
||||
|
||||
from inspect import isclass
|
||||
try:
|
||||
from inspect import getfullargspec as getargspec
|
||||
|
||||
_fullargspec_supported = True
|
||||
except ImportError:
|
||||
_fullargspec_supported = False
|
||||
from inspect import getargspec
|
||||
|
||||
from .utils import is_iterable
|
||||
|
||||
if sys.version_info < (3, 4, 0): # pragma: no cover
|
||||
|
@ -45,8 +55,8 @@ def call(function, *args, **kwargs):
|
|||
:return: sale vakye as default function call
|
||||
:rtype: object
|
||||
"""
|
||||
func = constructor_args if inspect.isclass(function) else function_args
|
||||
call_args, call_kwargs = func(function, *args, **kwargs)
|
||||
func = constructor_args if isclass(function) else function_args
|
||||
call_args, call_kwargs = func(function, *args, ignore_unused=True, **kwargs) # @see #20
|
||||
return function(*call_args, **call_kwargs)
|
||||
|
||||
|
||||
|
@ -63,7 +73,7 @@ def function_args(callable_, *args, **kwargs):
|
|||
:return: (args, kwargs) matching the function signature
|
||||
:rtype: tuple
|
||||
"""
|
||||
argspec = inspect.getargspec(callable_) # pylint:disable=deprecated-method
|
||||
argspec = getargspec(callable_) # pylint:disable=deprecated-method
|
||||
return argspec_args(argspec, False, *args, **kwargs)
|
||||
|
||||
|
||||
|
@ -80,7 +90,7 @@ def constructor_args(class_, *args, **kwargs):
|
|||
:return: (args, kwargs) matching the function signature
|
||||
:rtype: tuple
|
||||
"""
|
||||
argspec = inspect.getargspec(_constructor(class_)) # pylint:disable=deprecated-method
|
||||
argspec = getargspec(_constructor(class_)) # pylint:disable=deprecated-method
|
||||
return argspec_args(argspec, True, *args, **kwargs)
|
||||
|
||||
|
||||
|
@ -99,7 +109,7 @@ def argspec_args(argspec, constructor, *args, **kwargs):
|
|||
:return: (args, kwargs) matching the function signature
|
||||
:rtype: tuple
|
||||
"""
|
||||
if argspec.keywords:
|
||||
if argspec.varkw:
|
||||
call_kwarg = kwargs
|
||||
else:
|
||||
call_kwarg = dict((k, kwargs[k]) for k in kwargs if k in argspec.args) # Python 2.6 dict comprehension
|
||||
|
@ -110,6 +120,36 @@ def argspec_args(argspec, constructor, *args, **kwargs):
|
|||
return call_args, call_kwarg
|
||||
|
||||
|
||||
if not _fullargspec_supported:
|
||||
def argspec_args_legacy(argspec, constructor, *args, **kwargs):
|
||||
"""
|
||||
Return (args, kwargs) matching the argspec object
|
||||
|
||||
:param argspec: argspec to use
|
||||
:type argspec: argspec
|
||||
:param constructor: is it a constructor ?
|
||||
:type constructor: bool
|
||||
:param args:
|
||||
:type args:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return: (args, kwargs) matching the function signature
|
||||
:rtype: tuple
|
||||
"""
|
||||
if argspec.keywords:
|
||||
call_kwarg = kwargs
|
||||
else:
|
||||
call_kwarg = dict((k, kwargs[k]) for k in kwargs if k in argspec.args) # Python 2.6 dict comprehension
|
||||
if argspec.varargs:
|
||||
call_args = args
|
||||
else:
|
||||
call_args = args[:len(argspec.args) - (1 if constructor else 0)]
|
||||
return call_args, call_kwarg
|
||||
|
||||
|
||||
argspec_args = argspec_args_legacy
|
||||
|
||||
|
||||
def ensure_list(param):
|
||||
"""
|
||||
Retrieves a list from given parameter.
|
||||
|
@ -177,9 +217,12 @@ def filter_index(collection, predicate=None, index=None):
|
|||
return collection
|
||||
|
||||
|
||||
def set_defaults(defaults, kwargs):
|
||||
def set_defaults(defaults, kwargs, override=False):
|
||||
"""
|
||||
Set defaults from defaults dict to kwargs dict
|
||||
|
||||
:param override:
|
||||
:type override:
|
||||
:param defaults:
|
||||
:type defaults:
|
||||
:param kwargs:
|
||||
|
@ -187,12 +230,13 @@ def set_defaults(defaults, kwargs):
|
|||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if 'clear' in defaults.keys() and defaults.pop('clear'):
|
||||
kwargs.clear()
|
||||
for key, value in defaults.items():
|
||||
if key not in kwargs and value is not None:
|
||||
if key in kwargs:
|
||||
if isinstance(value, list) and isinstance(kwargs[key], list):
|
||||
kwargs[key] = list(value) + kwargs[key]
|
||||
elif isinstance(value, dict) and isinstance(kwargs[key], dict):
|
||||
set_defaults(value, kwargs[key])
|
||||
if key not in kwargs or override:
|
||||
kwargs[key] = value
|
||||
elif isinstance(value, list) and isinstance(kwargs[key], list):
|
||||
kwargs[key] = list(value) + kwargs[key]
|
||||
elif isinstance(value, dict) and isinstance(kwargs[key], dict):
|
||||
set_defaults(value, kwargs[key])
|
||||
elif key in kwargs and value is None:
|
||||
kwargs[key] = None
|
||||
|
|
|
@ -5,7 +5,11 @@ Classes and functions related to matches
|
|||
"""
|
||||
import copy
|
||||
import itertools
|
||||
from collections import defaultdict, MutableSequence
|
||||
from collections import defaultdict
|
||||
try:
|
||||
from collections.abc import MutableSequence
|
||||
except ImportError:
|
||||
from collections import MutableSequence
|
||||
|
||||
try:
|
||||
from collections import OrderedDict # pylint:disable=ungrouped-imports
|
||||
|
@ -778,9 +782,9 @@ class Match(object):
|
|||
right.start = end
|
||||
if right:
|
||||
ret.append(right)
|
||||
elif end <= current.end and end > current.start:
|
||||
elif current.end >= end > current.start:
|
||||
current.start = end
|
||||
elif start >= current.start and start < current.end:
|
||||
elif current.start <= start < current.end:
|
||||
current.end = start
|
||||
return filter_index(ret, predicate, index)
|
||||
|
||||
|
@ -811,6 +815,24 @@ class Match(object):
|
|||
|
||||
return filter_index(ret, predicate, index)
|
||||
|
||||
def tagged(self, *tags):
|
||||
"""
|
||||
Check if this match has at least one of the provided tags
|
||||
|
||||
:param tags:
|
||||
:return: True if at least one tag is defined, False otherwise.
|
||||
"""
|
||||
return any(tag in self.tags for tag in tags)
|
||||
|
||||
def named(self, *names):
|
||||
"""
|
||||
Check if one of the children match has one of the provided name
|
||||
|
||||
:param names:
|
||||
:return: True if at least one child is named with a given name is defined, False otherwise.
|
||||
"""
|
||||
return any(name in self.names for name in names)
|
||||
|
||||
def __len__(self):
|
||||
return self.end - self.start
|
||||
|
||||
|
|
|
@ -10,14 +10,39 @@ from abc import ABCMeta, abstractmethod, abstractproperty
|
|||
import six
|
||||
|
||||
from . import debug
|
||||
from .formatters import default_formatter
|
||||
from .loose import call, ensure_list, ensure_dict
|
||||
from .match import Match
|
||||
from .remodule import re, REGEX_AVAILABLE
|
||||
from .utils import find_all, is_iterable, get_first_defined
|
||||
from .validators import allways_true
|
||||
|
||||
|
||||
@six.add_metaclass(ABCMeta)
|
||||
class Pattern(object):
|
||||
class BasePattern(object):
|
||||
"""
|
||||
Base class for Pattern like objects
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def matches(self, input_string, context=None, with_raw_matches=False):
|
||||
"""
|
||||
Computes all matches for a given input
|
||||
|
||||
:param input_string: the string to parse
|
||||
:type input_string: str
|
||||
:param context: the context
|
||||
:type context: dict
|
||||
:param with_raw_matches: should return details
|
||||
:type with_raw_matches: dict
|
||||
:return: matches based on input_string for this pattern
|
||||
:rtype: iterator[Match]
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@six.add_metaclass(ABCMeta)
|
||||
class Pattern(BasePattern):
|
||||
"""
|
||||
Definition of a particular pattern to search for.
|
||||
"""
|
||||
|
@ -25,7 +50,7 @@ class Pattern(object):
|
|||
def __init__(self, name=None, tags=None, formatter=None, value=None, validator=None, children=False, every=False,
|
||||
private_parent=False, private_children=False, private=False, private_names=None, ignore_names=None,
|
||||
marker=False, format_all=False, validate_all=False, disabled=lambda context: False, log_level=None,
|
||||
properties=None, post_processor=None, **kwargs):
|
||||
properties=None, post_processor=None, pre_match_processor=None, post_match_processor=None, **kwargs):
|
||||
"""
|
||||
:param name: Name of this pattern
|
||||
:type name: str
|
||||
|
@ -66,15 +91,19 @@ class Pattern(object):
|
|||
:type disabled: bool|function
|
||||
:param log_lvl: Log level associated to this pattern
|
||||
:type log_lvl: int
|
||||
:param post_process: Post processing function
|
||||
:param post_processor: Post processing function
|
||||
:type post_processor: func
|
||||
:param pre_match_processor: Pre match processing function
|
||||
:type pre_match_processor: func
|
||||
:param post_match_processor: Post match processing function
|
||||
:type post_match_processor: func
|
||||
"""
|
||||
# pylint:disable=too-many-locals,unused-argument
|
||||
self.name = name
|
||||
self.tags = ensure_list(tags)
|
||||
self.formatters, self._default_formatter = ensure_dict(formatter, lambda x: x)
|
||||
self.formatters, self._default_formatter = ensure_dict(formatter, default_formatter)
|
||||
self.values, self._default_value = ensure_dict(value, None)
|
||||
self.validators, self._default_validator = ensure_dict(validator, lambda match: True)
|
||||
self.validators, self._default_validator = ensure_dict(validator, allways_true)
|
||||
self.every = every
|
||||
self.children = children
|
||||
self.private = private
|
||||
|
@ -96,6 +125,14 @@ class Pattern(object):
|
|||
self.post_processor = None
|
||||
else:
|
||||
self.post_processor = post_processor
|
||||
if not callable(pre_match_processor):
|
||||
self.pre_match_processor = None
|
||||
else:
|
||||
self.pre_match_processor = pre_match_processor
|
||||
if not callable(post_match_processor):
|
||||
self.post_match_processor = None
|
||||
else:
|
||||
self.post_match_processor = post_match_processor
|
||||
|
||||
@property
|
||||
def log_level(self):
|
||||
|
@ -106,83 +143,6 @@ class Pattern(object):
|
|||
"""
|
||||
return self._log_level if self._log_level is not None else debug.LOG_LEVEL
|
||||
|
||||
def _yield_children(self, match):
|
||||
"""
|
||||
Does this match has children
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return match.children and (self.children or self.every)
|
||||
|
||||
def _yield_parent(self):
|
||||
"""
|
||||
Does this mat
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return not self.children or self.every
|
||||
|
||||
def _match_parent(self, match, yield_parent):
|
||||
"""
|
||||
Handle a parent match
|
||||
:param match:
|
||||
:type match:
|
||||
:param yield_parent:
|
||||
:type yield_parent:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if not match or match.value == "":
|
||||
return False
|
||||
|
||||
pattern_value = get_first_defined(self.values, [match.name, '__parent__', None],
|
||||
self._default_value)
|
||||
if pattern_value:
|
||||
match.value = pattern_value
|
||||
|
||||
if yield_parent or self.format_all:
|
||||
match.formatter = get_first_defined(self.formatters, [match.name, '__parent__', None],
|
||||
self._default_formatter)
|
||||
if yield_parent or self.validate_all:
|
||||
validator = get_first_defined(self.validators, [match.name, '__parent__', None],
|
||||
self._default_validator)
|
||||
if validator and not validator(match):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _match_child(self, child, yield_children):
|
||||
"""
|
||||
Handle a children match
|
||||
:param child:
|
||||
:type child:
|
||||
:param yield_children:
|
||||
:type yield_children:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if not child or child.value == "":
|
||||
return False
|
||||
|
||||
pattern_value = get_first_defined(self.values, [child.name, '__children__', None],
|
||||
self._default_value)
|
||||
if pattern_value:
|
||||
child.value = pattern_value
|
||||
|
||||
if yield_children or self.format_all:
|
||||
child.formatter = get_first_defined(self.formatters, [child.name, '__children__', None],
|
||||
self._default_formatter)
|
||||
|
||||
if yield_children or self.validate_all:
|
||||
validator = get_first_defined(self.validators, [child.name, '__children__', None],
|
||||
self._default_validator)
|
||||
if validator and not validator(child):
|
||||
return False
|
||||
return True
|
||||
|
||||
def matches(self, input_string, context=None, with_raw_matches=False):
|
||||
"""
|
||||
Computes all matches for a given input
|
||||
|
@ -200,41 +160,168 @@ class Pattern(object):
|
|||
|
||||
matches = []
|
||||
raw_matches = []
|
||||
|
||||
for pattern in self.patterns:
|
||||
yield_parent = self._yield_parent()
|
||||
match_index = -1
|
||||
match_index = 0
|
||||
for match in self._match(pattern, input_string, context):
|
||||
match_index += 1
|
||||
match.match_index = match_index
|
||||
raw_matches.append(match)
|
||||
yield_children = self._yield_children(match)
|
||||
if not self._match_parent(match, yield_parent):
|
||||
continue
|
||||
validated = True
|
||||
for child in match.children:
|
||||
if not self._match_child(child, yield_children):
|
||||
validated = False
|
||||
break
|
||||
if validated:
|
||||
if self.private_parent:
|
||||
match.private = True
|
||||
if self.private_children:
|
||||
for child in match.children:
|
||||
child.private = True
|
||||
if yield_parent or self.private_parent:
|
||||
matches.append(match)
|
||||
if yield_children or self.private_children:
|
||||
for child in match.children:
|
||||
child.match_index = match_index
|
||||
matches.append(child)
|
||||
matches = self._matches_post_process(matches)
|
||||
self._matches_privatize(matches)
|
||||
self._matches_ignore(matches)
|
||||
matches.extend(self._process_matches(match, match_index))
|
||||
match_index += 1
|
||||
|
||||
matches = self._post_process_matches(matches)
|
||||
|
||||
if with_raw_matches:
|
||||
return matches, raw_matches
|
||||
return matches
|
||||
|
||||
def _matches_post_process(self, matches):
|
||||
@property
|
||||
def _should_include_children(self):
|
||||
"""
|
||||
Check if children matches from this pattern should be included in matches results.
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return self.children or self.every
|
||||
|
||||
@property
|
||||
def _should_include_parent(self):
|
||||
"""
|
||||
Check is a match from this pattern should be included in matches results.
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return not self.children or self.every
|
||||
|
||||
@staticmethod
|
||||
def _match_config_property_keys(match, child=False):
|
||||
if match.name:
|
||||
yield match.name
|
||||
if child:
|
||||
yield '__children__'
|
||||
else:
|
||||
yield '__parent__'
|
||||
yield None
|
||||
|
||||
@staticmethod
|
||||
def _process_match_index(match, match_index):
|
||||
"""
|
||||
Process match index from this pattern process state.
|
||||
|
||||
:param match:
|
||||
:return:
|
||||
"""
|
||||
match.match_index = match_index
|
||||
|
||||
def _process_match_private(self, match, child=False):
|
||||
"""
|
||||
Process match privacy from this pattern configuration.
|
||||
|
||||
:param match:
|
||||
:param child:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if match.name and match.name in self.private_names or \
|
||||
not child and self.private_parent or \
|
||||
child and self.private_children:
|
||||
match.private = True
|
||||
|
||||
def _process_match_value(self, match, child=False):
|
||||
"""
|
||||
Process match value from this pattern configuration.
|
||||
:param match:
|
||||
:return:
|
||||
"""
|
||||
keys = self._match_config_property_keys(match, child=child)
|
||||
pattern_value = get_first_defined(self.values, keys, self._default_value)
|
||||
if pattern_value:
|
||||
match.value = pattern_value
|
||||
|
||||
def _process_match_formatter(self, match, child=False):
|
||||
"""
|
||||
Process match formatter from this pattern configuration.
|
||||
|
||||
:param match:
|
||||
:return:
|
||||
"""
|
||||
included = self._should_include_children if child else self._should_include_parent
|
||||
if included or self.format_all:
|
||||
keys = self._match_config_property_keys(match, child=child)
|
||||
match.formatter = get_first_defined(self.formatters, keys, self._default_formatter)
|
||||
|
||||
def _process_match_validator(self, match, child=False):
|
||||
"""
|
||||
Process match validation from this pattern configuration.
|
||||
|
||||
:param match:
|
||||
:return: True if match is validated by the configured validator, False otherwise.
|
||||
"""
|
||||
included = self._should_include_children if child else self._should_include_parent
|
||||
if included or self.validate_all:
|
||||
keys = self._match_config_property_keys(match, child=child)
|
||||
validator = get_first_defined(self.validators, keys, self._default_validator)
|
||||
if validator and not validator(match):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _process_match(self, match, match_index, child=False):
|
||||
"""
|
||||
Process match from this pattern by setting all properties from defined configuration
|
||||
(index, private, value, formatter, validator, ...).
|
||||
|
||||
:param match:
|
||||
:type match:
|
||||
:return: True if match is validated by the configured validator, False otherwise.
|
||||
:rtype:
|
||||
"""
|
||||
self._process_match_index(match, match_index)
|
||||
self._process_match_private(match, child)
|
||||
self._process_match_value(match, child)
|
||||
self._process_match_formatter(match, child)
|
||||
return self._process_match_validator(match, child)
|
||||
|
||||
@staticmethod
|
||||
def _process_match_processor(match, processor):
|
||||
if processor:
|
||||
ret = processor(match)
|
||||
if ret is not None:
|
||||
return ret
|
||||
return match
|
||||
|
||||
def _process_matches(self, match, match_index):
|
||||
"""
|
||||
Process and generate all matches for the given unprocessed match.
|
||||
:param match:
|
||||
:param match_index:
|
||||
:return: Process and dispatched matches.
|
||||
"""
|
||||
match = self._process_match_processor(match, self.pre_match_processor)
|
||||
if not match:
|
||||
return
|
||||
|
||||
if not self._process_match(match, match_index):
|
||||
return
|
||||
|
||||
for child in match.children:
|
||||
if not self._process_match(child, match_index, child=True):
|
||||
return
|
||||
|
||||
match = self._process_match_processor(match, self.post_match_processor)
|
||||
if not match:
|
||||
return
|
||||
|
||||
if (self._should_include_parent or self.private_parent) and match.name not in self.ignore_names:
|
||||
yield match
|
||||
if self._should_include_children or self.private_children:
|
||||
children = [x for x in match.children if x.name not in self.ignore_names]
|
||||
for child in children:
|
||||
yield child
|
||||
|
||||
def _post_process_matches(self, matches):
|
||||
"""
|
||||
Post process matches with user defined function
|
||||
:param matches:
|
||||
|
@ -246,32 +333,6 @@ class Pattern(object):
|
|||
return self.post_processor(matches, self)
|
||||
return matches
|
||||
|
||||
def _matches_privatize(self, matches):
|
||||
"""
|
||||
Mark matches included in private_names with private flag.
|
||||
:param matches:
|
||||
:type matches:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if self.private_names:
|
||||
for match in matches:
|
||||
if match.name in self.private_names:
|
||||
match.private = True
|
||||
|
||||
def _matches_ignore(self, matches):
|
||||
"""
|
||||
Ignore matches included in ignore_names.
|
||||
:param matches:
|
||||
:type matches:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if self.ignore_names:
|
||||
for match in list(matches):
|
||||
if match.name in self.ignore_names:
|
||||
matches.remove(match)
|
||||
|
||||
@abstractproperty
|
||||
def patterns(self): # pragma: no cover
|
||||
"""
|
||||
|
@ -306,7 +367,7 @@ class Pattern(object):
|
|||
@abstractmethod
|
||||
def _match(self, pattern, input_string, context=None): # pragma: no cover
|
||||
"""
|
||||
Computes all matches for a given pattern and input
|
||||
Computes all unprocess matches for a given pattern and input.
|
||||
|
||||
:param pattern: the pattern to use
|
||||
:param input_string: the string to parse
|
||||
|
@ -350,7 +411,9 @@ class StringPattern(Pattern):
|
|||
|
||||
def _match(self, pattern, input_string, context=None):
|
||||
for index in find_all(input_string, pattern, **self._kwargs):
|
||||
yield Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs)
|
||||
match = Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs)
|
||||
if match:
|
||||
yield match
|
||||
|
||||
|
||||
class RePattern(Pattern):
|
||||
|
@ -411,15 +474,18 @@ class RePattern(Pattern):
|
|||
for start, end in match_object.spans(i):
|
||||
child_match = Match(start, end, name=name, parent=main_match, pattern=self,
|
||||
input_string=input_string, **self._children_match_kwargs)
|
||||
main_match.children.append(child_match)
|
||||
if child_match:
|
||||
main_match.children.append(child_match)
|
||||
else:
|
||||
start, end = match_object.span(i)
|
||||
if start > -1 and end > -1:
|
||||
child_match = Match(start, end, name=name, parent=main_match, pattern=self,
|
||||
input_string=input_string, **self._children_match_kwargs)
|
||||
main_match.children.append(child_match)
|
||||
if child_match:
|
||||
main_match.children.append(child_match)
|
||||
|
||||
yield main_match
|
||||
if main_match:
|
||||
yield main_match
|
||||
|
||||
|
||||
class FunctionalPattern(Pattern):
|
||||
|
@ -457,14 +523,18 @@ class FunctionalPattern(Pattern):
|
|||
if self._match_kwargs:
|
||||
options = self._match_kwargs.copy()
|
||||
options.update(args)
|
||||
yield Match(pattern=self, input_string=input_string, **options)
|
||||
match = Match(pattern=self, input_string=input_string, **options)
|
||||
if match:
|
||||
yield match
|
||||
else:
|
||||
kwargs = self._match_kwargs
|
||||
if isinstance(args[-1], dict):
|
||||
kwargs = dict(kwargs)
|
||||
kwargs.update(args[-1])
|
||||
args = args[:-1]
|
||||
yield Match(*args, pattern=self, input_string=input_string, **kwargs)
|
||||
match = Match(*args, pattern=self, input_string=input_string, **kwargs)
|
||||
if match:
|
||||
yield match
|
||||
|
||||
|
||||
def filter_match_kwargs(kwargs, children=False):
|
||||
|
|
|
@ -30,7 +30,7 @@ def _default_conflict_solver(match, conflicting_match):
|
|||
"""
|
||||
if len(conflicting_match.initiator) < len(match.initiator):
|
||||
return conflicting_match
|
||||
elif len(match.initiator) < len(conflicting_match.initiator):
|
||||
if len(match.initiator) < len(conflicting_match.initiator):
|
||||
return match
|
||||
return None
|
||||
|
||||
|
|
|
@ -5,20 +5,16 @@ Entry point functions and classes for Rebulk
|
|||
"""
|
||||
from logging import getLogger
|
||||
|
||||
from .builder import Builder
|
||||
from .match import Matches
|
||||
|
||||
from .pattern import RePattern, StringPattern, FunctionalPattern
|
||||
from .chain import Chain
|
||||
|
||||
from .processors import ConflictSolver, PrivateRemover
|
||||
from .loose import set_defaults
|
||||
from .utils import extend_safe
|
||||
from .rules import Rules
|
||||
from .utils import extend_safe
|
||||
|
||||
log = getLogger(__name__).log
|
||||
|
||||
|
||||
class Rebulk(object):
|
||||
class Rebulk(Builder):
|
||||
r"""
|
||||
Regular expression, string and function based patterns are declared in a ``Rebulk`` object. It use a fluent API to
|
||||
chain ``string``, ``regex``, and ``functional`` methods to define various patterns types.
|
||||
|
@ -44,6 +40,7 @@ class Rebulk(object):
|
|||
>>> bulk.matches("the lakers are from la")
|
||||
[<lakers:(4, 10)>, <la:(20, 22)>]
|
||||
"""
|
||||
|
||||
# pylint:disable=protected-access
|
||||
|
||||
def __init__(self, disabled=lambda context: False, default_rules=True):
|
||||
|
@ -56,6 +53,7 @@ class Rebulk(object):
|
|||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
super(Rebulk, self).__init__()
|
||||
if not callable(disabled):
|
||||
self.disabled = lambda context: disabled
|
||||
else:
|
||||
|
@ -64,11 +62,6 @@ class Rebulk(object):
|
|||
self._rules = Rules()
|
||||
if default_rules:
|
||||
self.rules(ConflictSolver, PrivateRemover)
|
||||
self._defaults = {}
|
||||
self._regex_defaults = {}
|
||||
self._string_defaults = {}
|
||||
self._functional_defaults = {}
|
||||
self._chain_defaults = {}
|
||||
self._rebulks = []
|
||||
|
||||
def pattern(self, *pattern):
|
||||
|
@ -83,172 +76,6 @@ class Rebulk(object):
|
|||
self._patterns.extend(pattern)
|
||||
return self
|
||||
|
||||
def defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for all patterns
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._defaults = kwargs
|
||||
return self
|
||||
|
||||
def regex_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._regex_defaults = kwargs
|
||||
return self
|
||||
|
||||
def regex(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add re pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
self.pattern(self.build_re(*pattern, **kwargs))
|
||||
return self
|
||||
|
||||
def build_re(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new regular expression pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._regex_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return RePattern(*pattern, **kwargs)
|
||||
|
||||
def string_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for string patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._string_defaults = kwargs
|
||||
return self
|
||||
|
||||
def string(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add string pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
self.pattern(self.build_string(*pattern, **kwargs))
|
||||
return self
|
||||
|
||||
def build_string(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new string pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._string_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return StringPattern(*pattern, **kwargs)
|
||||
|
||||
def functional_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._functional_defaults = kwargs
|
||||
return self
|
||||
|
||||
def functional(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add functional pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
self.pattern(self.build_functional(*pattern, **kwargs))
|
||||
return self
|
||||
|
||||
def build_functional(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new functional pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._functional_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return FunctionalPattern(*pattern, **kwargs)
|
||||
|
||||
def chain_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for patterns chain.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._chain_defaults = kwargs
|
||||
return self
|
||||
|
||||
def chain(self, **kwargs):
|
||||
"""
|
||||
Add patterns chain, using configuration of this rebulk
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
chain = self.build_chain(**kwargs)
|
||||
self._patterns.append(chain)
|
||||
return chain
|
||||
|
||||
def build_chain(self, **kwargs):
|
||||
"""
|
||||
Builds a new patterns chain
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._chain_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return Chain(self, **kwargs)
|
||||
|
||||
def rules(self, *rules):
|
||||
"""
|
||||
Add rules as a module, class or instance.
|
||||
|
|
|
@ -140,10 +140,9 @@ class RemoveMatch(Consequence): # pylint: disable=abstract-method
|
|||
matches.remove(match)
|
||||
ret.append(match)
|
||||
return ret
|
||||
else:
|
||||
if when_response in matches:
|
||||
matches.remove(when_response)
|
||||
return when_response
|
||||
if when_response in matches:
|
||||
matches.remove(when_response)
|
||||
return when_response
|
||||
|
||||
|
||||
class AppendMatch(Consequence): # pylint: disable=abstract-method
|
||||
|
@ -164,12 +163,11 @@ class AppendMatch(Consequence): # pylint: disable=abstract-method
|
|||
matches.append(match)
|
||||
ret.append(match)
|
||||
return ret
|
||||
else:
|
||||
if self.match_name:
|
||||
when_response.name = self.match_name
|
||||
if when_response not in matches:
|
||||
matches.append(when_response)
|
||||
return when_response
|
||||
if self.match_name:
|
||||
when_response.name = self.match_name
|
||||
if when_response not in matches:
|
||||
matches.append(when_response)
|
||||
return when_response
|
||||
|
||||
|
||||
class RenameMatch(Consequence): # pylint: disable=abstract-method
|
||||
|
|
|
@ -2,11 +2,11 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, no-member, len-as-condition
|
||||
import re
|
||||
|
||||
from functools import partial
|
||||
|
||||
from rebulk.pattern import FunctionalPattern, StringPattern, RePattern
|
||||
from ..rebulk import Rebulk
|
||||
from ..validators import chars_surround
|
||||
from ..rebulk import Rebulk, FunctionalPattern, RePattern, StringPattern
|
||||
|
||||
|
||||
def test_chain_close():
|
||||
|
@ -63,18 +63,61 @@ def test_build_chain():
|
|||
|
||||
def test_chain_defaults():
|
||||
rebulk = Rebulk()
|
||||
rebulk.defaults(validator=lambda x: True, ignore_names=['testIgnore'], children=True)
|
||||
rebulk.defaults(validator=lambda x: x.value.startswith('t'), ignore_names=['testIgnore'], children=True)
|
||||
|
||||
rebulk.chain()\
|
||||
rebulk.chain() \
|
||||
.regex("(?P<test>test)") \
|
||||
.regex(" ").repeater("*") \
|
||||
.regex("(?P<best>best)") \
|
||||
.regex(" ").repeater("*") \
|
||||
.regex("(?P<testIgnore>testIgnore)")
|
||||
matches = rebulk.matches("test testIgnore")
|
||||
matches = rebulk.matches("test best testIgnore")
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].name == "test"
|
||||
|
||||
|
||||
def test_chain_with_validators():
|
||||
def chain_validator(match):
|
||||
return match.value.startswith('t') and match.value.endswith('t')
|
||||
|
||||
def default_validator(match):
|
||||
return match.value.startswith('t') and match.value.endswith('g')
|
||||
|
||||
def custom_validator(match):
|
||||
return match.value.startswith('b') and match.value.endswith('t')
|
||||
|
||||
rebulk = Rebulk()
|
||||
rebulk.defaults(children=True, validator=default_validator)
|
||||
|
||||
rebulk.chain(validate_all=True, validator={'__parent__': chain_validator}) \
|
||||
.regex("(?P<test>testing)", validator=default_validator).repeater("+") \
|
||||
.regex(" ").repeater("+") \
|
||||
.regex("(?P<best>best)", validator=custom_validator).repeater("+")
|
||||
matches = rebulk.matches("some testing best end")
|
||||
|
||||
assert len(matches) == 2
|
||||
assert matches[0].name == "test"
|
||||
assert matches[1].name == "best"
|
||||
|
||||
|
||||
def test_matches_docs():
|
||||
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE) \
|
||||
.defaults(children=True, formatter={'episode': int, 'version': int}) \
|
||||
.chain() \
|
||||
.regex(r'e(?P<episode>\d{1,4})').repeater(1) \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'[ex-](?P<episode>\d{1,4})').repeater('*') \
|
||||
.close() # .repeater(1) could be omitted as it's the default behavior
|
||||
|
||||
result = rebulk.matches("This is E14v2-15-16-17").to_dict() # converts matches to dict
|
||||
|
||||
assert 'episode' in result
|
||||
assert result['episode'] == [14, 15, 16, 17]
|
||||
assert 'version' in result
|
||||
assert result['version'] == 2
|
||||
|
||||
|
||||
def test_matches():
|
||||
rebulk = Rebulk()
|
||||
|
||||
|
@ -144,8 +187,8 @@ def test_matches():
|
|||
def test_matches_2():
|
||||
rebulk = Rebulk() \
|
||||
.regex_defaults(flags=re.IGNORECASE) \
|
||||
.chain(children=True, formatter={'episode': int}) \
|
||||
.defaults(formatter={'version': int}) \
|
||||
.defaults(children=True, formatter={'episode': int, 'version': int}) \
|
||||
.chain() \
|
||||
.regex(r'e(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'[ex-](?P<episode>\d{1,4})').repeater('*') \
|
||||
|
@ -173,25 +216,32 @@ def test_matches_2():
|
|||
def test_matches_3():
|
||||
alt_dash = (r'@', r'[\W_]') # abbreviation
|
||||
|
||||
rebulk = Rebulk()
|
||||
match_names = ['season', 'episode']
|
||||
other_names = ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date']
|
||||
|
||||
rebulk.chain(formatter={'season': int, 'episode': int},
|
||||
tags=['SxxExx'],
|
||||
abbreviations=[alt_dash],
|
||||
private_names=['episodeSeparator', 'seasonSeparator'],
|
||||
children=True,
|
||||
private_parent=True,
|
||||
conflict_solver=lambda match, other: match
|
||||
if match.name in ['season', 'episode'] and other.name in
|
||||
['screen_size', 'video_codec', 'audio_codec',
|
||||
'audio_channels', 'container', 'date']
|
||||
else '__default__') \
|
||||
rebulk = Rebulk()
|
||||
rebulk.defaults(formatter={'season': int, 'episode': int},
|
||||
tags=['SxxExx'],
|
||||
abbreviations=[alt_dash],
|
||||
private_names=['episodeSeparator', 'seasonSeparator'],
|
||||
children=True,
|
||||
private_parent=True,
|
||||
conflict_solver=lambda match, other: match
|
||||
if match.name in match_names and other.name in other_names
|
||||
else '__default__')
|
||||
|
||||
rebulk.chain() \
|
||||
.defaults(children=True, private_parent=True) \
|
||||
.regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)') \
|
||||
.regex(r'(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+)').repeater('*') \
|
||||
.close() \
|
||||
.chain() \
|
||||
.defaults(children=True, private_parent=True) \
|
||||
.regex(r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)') \
|
||||
.regex(r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))').repeater('*') \
|
||||
.close() \
|
||||
.chain() \
|
||||
.defaults(children=True, private_parent=True) \
|
||||
.regex(r'S(?P<season>\d+)') \
|
||||
.regex(r'(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+)').repeater('*')
|
||||
|
||||
|
@ -240,11 +290,11 @@ def test_matches_4():
|
|||
|
||||
rebulk = Rebulk()
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
|
||||
validator={'__parent__': seps_surround}, children=True, private_parent=True)
|
||||
rebulk.defaults(validate_all=True, children=True)
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], private_parent=True)
|
||||
|
||||
rebulk.chain(formatter={'episode': int, 'version': int}) \
|
||||
.defaults(validator=None) \
|
||||
rebulk.chain(validator={'__parent__': seps_surround}, formatter={'episode': int, 'version': int}) \
|
||||
.defaults(formatter={'episode': int, 'version': int}) \
|
||||
.regex(r'e(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')
|
||||
|
@ -262,11 +312,11 @@ def test_matches_5():
|
|||
|
||||
rebulk = Rebulk()
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
|
||||
validator={'__parent__': seps_surround}, children=True, private_parent=True)
|
||||
|
||||
rebulk.chain(formatter={'episode': int, 'version': int}) \
|
||||
.defaults(validator=None) \
|
||||
rebulk.chain(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
|
||||
validator={'__parent__': seps_surround}, children=True, private_parent=True,
|
||||
formatter={'episode': int, 'version': int}) \
|
||||
.defaults(children=True, private_parent=True) \
|
||||
.regex(r'e(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}')
|
||||
|
@ -288,7 +338,7 @@ def test_matches_6():
|
|||
validator=None, children=True, private_parent=True)
|
||||
|
||||
rebulk.chain(formatter={'episode': int, 'version': int}) \
|
||||
.defaults(validator=None) \
|
||||
.defaults(children=True, private_parent=True) \
|
||||
.regex(r'e(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}')
|
||||
|
|
|
@ -2,19 +2,15 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, protected-access, invalid-name, len-as-condition
|
||||
|
||||
from .default_rules_module import RuleRemove0
|
||||
from .. import debug
|
||||
from ..match import Match
|
||||
from ..pattern import StringPattern
|
||||
from ..rebulk import Rebulk
|
||||
from ..match import Match
|
||||
from .. import debug
|
||||
from .default_rules_module import RuleRemove0
|
||||
|
||||
|
||||
class TestDebug(object):
|
||||
|
||||
|
||||
#request.addfinalizer(disable_debug)
|
||||
|
||||
|
||||
# request.addfinalizer(disable_debug)
|
||||
|
||||
debug.DEBUG = True
|
||||
pattern = StringPattern(1, 3, value="es")
|
||||
|
@ -38,43 +34,43 @@ class TestDebug(object):
|
|||
debug.DEBUG = False
|
||||
|
||||
def test_pattern(self):
|
||||
assert self.pattern.defined_at.lineno == 20
|
||||
assert self.pattern.defined_at.lineno > 0
|
||||
assert self.pattern.defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.pattern.defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.pattern.defined_at) == 'test_debug.py#L20'
|
||||
assert repr(self.pattern) == '<StringPattern@test_debug.py#L20:(1, 3)>'
|
||||
assert str(self.pattern.defined_at).startswith('test_debug.py#L')
|
||||
assert repr(self.pattern).startswith('<StringPattern@test_debug.py#L')
|
||||
|
||||
def test_match(self):
|
||||
assert self.match.defined_at.lineno == 22
|
||||
assert self.match.defined_at.lineno > 0
|
||||
assert self.match.defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.match.defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.match.defined_at) == 'test_debug.py#L22'
|
||||
assert str(self.match.defined_at).startswith('test_debug.py#L')
|
||||
|
||||
def test_rule(self):
|
||||
assert self.rule.defined_at.lineno == 23
|
||||
assert self.rule.defined_at.lineno > 0
|
||||
assert self.rule.defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.rule.defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.rule.defined_at) == 'test_debug.py#L23'
|
||||
assert repr(self.rule) == '<RuleRemove0@test_debug.py#L23>'
|
||||
assert str(self.rule.defined_at).startswith('test_debug.py#L')
|
||||
assert repr(self.rule).startswith('<RuleRemove0@test_debug.py#L')
|
||||
|
||||
def test_rebulk(self):
|
||||
"""
|
||||
This test fails on travis CI, can't find out why there's 1 line offset ...
|
||||
"""
|
||||
assert self.rebulk._patterns[0].defined_at.lineno in [26, 27]
|
||||
assert self.rebulk._patterns[0].defined_at.lineno > 0
|
||||
assert self.rebulk._patterns[0].defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.rebulk._patterns[0].defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.rebulk._patterns[0].defined_at) in ['test_debug.py#L26', 'test_debug.py#L27']
|
||||
assert str(self.rebulk._patterns[0].defined_at).startswith('test_debug.py#L')
|
||||
|
||||
assert self.rebulk._patterns[1].defined_at.lineno in [27, 28]
|
||||
assert self.rebulk._patterns[1].defined_at.lineno > 0
|
||||
assert self.rebulk._patterns[1].defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.rebulk._patterns[1].defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.rebulk._patterns[1].defined_at) in ['test_debug.py#L27', 'test_debug.py#L28']
|
||||
assert str(self.rebulk._patterns[1].defined_at).startswith('test_debug.py#L')
|
||||
|
||||
assert self.matches[0].defined_at == self.rebulk._patterns[0].defined_at
|
||||
assert self.matches[1].defined_at == self.rebulk._patterns[1].defined_at
|
||||
|
|
|
@ -116,6 +116,9 @@ class TestMatchesClass(object):
|
|||
assert "tag1" in matches.tags
|
||||
assert "tag2" in matches.tags
|
||||
|
||||
assert self.match3.tagged("tag1")
|
||||
assert not self.match3.tagged("start")
|
||||
|
||||
tag1 = matches.tagged("tag1")
|
||||
assert len(tag1) == 2
|
||||
assert tag1[0] == self.match2
|
||||
|
|
|
@ -3,7 +3,10 @@
|
|||
"""
|
||||
Various utilities functions
|
||||
"""
|
||||
from collections import MutableSet
|
||||
try:
|
||||
from collections.abc import MutableSet
|
||||
except ImportError:
|
||||
from collections import MutableSet
|
||||
|
||||
from types import GeneratorType
|
||||
|
||||
|
|
|
@ -62,9 +62,20 @@ def validators(*chained_validators):
|
|||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
|
||||
def validator_chain(match): # pylint:disable=missing-docstring
|
||||
for chained_validator in chained_validators:
|
||||
if not chained_validator(match):
|
||||
return False
|
||||
return True
|
||||
|
||||
return validator_chain
|
||||
|
||||
|
||||
def allways_true(match): # pylint:disable=unused-argument
|
||||
"""
|
||||
A validator which is allways true
|
||||
:param match:
|
||||
:return:
|
||||
"""
|
||||
return True
|
||||
|
|
|
@ -73,9 +73,9 @@ class Addic7edSubtitle(Subtitle):
|
|||
# resolution
|
||||
if video.resolution and self.version and video.resolution in self.version.lower():
|
||||
matches.add('resolution')
|
||||
# format
|
||||
if video.format and self.version and video.format.lower() in self.version.lower():
|
||||
matches.add('format')
|
||||
# source
|
||||
if video.source and self.version and video.source.lower() in self.version.lower():
|
||||
matches.add('source')
|
||||
# other properties
|
||||
matches |= guess_matches(video, guessit(self.version), partial=True)
|
||||
|
||||
|
|
|
@ -46,13 +46,13 @@ def refine(video, embedded_subtitles=True, **kwargs):
|
|||
|
||||
# video codec
|
||||
if video_track.codec_id == 'V_MPEG4/ISO/AVC':
|
||||
video.video_codec = 'h264'
|
||||
video.video_codec = 'H.264'
|
||||
logger.debug('Found video_codec %s', video.video_codec)
|
||||
elif video_track.codec_id == 'V_MPEG4/ISO/SP':
|
||||
video.video_codec = 'DivX'
|
||||
logger.debug('Found video_codec %s', video.video_codec)
|
||||
elif video_track.codec_id == 'V_MPEG4/ISO/ASP':
|
||||
video.video_codec = 'XviD'
|
||||
video.video_codec = 'Xvid'
|
||||
logger.debug('Found video_codec %s', video.video_codec)
|
||||
else:
|
||||
logger.warning('MKV has no video track')
|
||||
|
@ -62,7 +62,7 @@ def refine(video, embedded_subtitles=True, **kwargs):
|
|||
audio_track = mkv.audio_tracks[0]
|
||||
# audio codec
|
||||
if audio_track.codec_id == 'A_AC3':
|
||||
video.audio_codec = 'AC3'
|
||||
video.audio_codec = 'Dolby Digital'
|
||||
logger.debug('Found audio_codec %s', video.audio_codec)
|
||||
elif audio_track.codec_id == 'A_DTS':
|
||||
video.audio_codec = 'DTS'
|
||||
|
|
|
@ -17,7 +17,7 @@ Available matches:
|
|||
* season
|
||||
* episode
|
||||
* release_group
|
||||
* format
|
||||
* source
|
||||
* audio_codec
|
||||
* resolution
|
||||
* hearing_impaired
|
||||
|
@ -38,11 +38,11 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
#: Scores for episodes
|
||||
episode_scores = {'hash': 359, 'series': 180, 'year': 90, 'season': 30, 'episode': 30, 'release_group': 15,
|
||||
'format': 7, 'audio_codec': 3, 'resolution': 2, 'video_codec': 2, 'hearing_impaired': 1}
|
||||
'source': 7, 'audio_codec': 3, 'resolution': 2, 'video_codec': 2, 'hearing_impaired': 1}
|
||||
|
||||
#: Scores for movies
|
||||
movie_scores = {'hash': 119, 'title': 60, 'year': 30, 'release_group': 15,
|
||||
'format': 7, 'audio_codec': 3, 'resolution': 2, 'video_codec': 2, 'hearing_impaired': 1}
|
||||
'source': 7, 'audio_codec': 3, 'resolution': 2, 'video_codec': 2, 'hearing_impaired': 1}
|
||||
|
||||
#: Equivalent release groups
|
||||
equivalent_release_groups = ({'LOL', 'DIMENSION'}, {'ASAP', 'IMMERSE', 'FLEET'}, {'AVS', 'SVA'})
|
||||
|
@ -153,30 +153,30 @@ def solve_episode_equations():
|
|||
from sympy import Eq, solve, symbols
|
||||
|
||||
hash, series, year, season, episode, release_group = symbols('hash series year season episode release_group')
|
||||
format, audio_codec, resolution, video_codec = symbols('format audio_codec resolution video_codec')
|
||||
source, audio_codec, resolution, video_codec = symbols('source audio_codec resolution video_codec')
|
||||
hearing_impaired = symbols('hearing_impaired')
|
||||
|
||||
equations = [
|
||||
# hash is best
|
||||
Eq(hash, series + year + season + episode + release_group + format + audio_codec + resolution + video_codec),
|
||||
Eq(hash, series + year + season + episode + release_group + source + audio_codec + resolution + video_codec),
|
||||
|
||||
# series counts for the most part in the total score
|
||||
Eq(series, year + season + episode + release_group + format + audio_codec + resolution + video_codec + 1),
|
||||
Eq(series, year + season + episode + release_group + source + audio_codec + resolution + video_codec + 1),
|
||||
|
||||
# year is the second most important part
|
||||
Eq(year, season + episode + release_group + format + audio_codec + resolution + video_codec + 1),
|
||||
Eq(year, season + episode + release_group + source + audio_codec + resolution + video_codec + 1),
|
||||
|
||||
# season is important too
|
||||
Eq(season, release_group + format + audio_codec + resolution + video_codec + 1),
|
||||
Eq(season, release_group + source + audio_codec + resolution + video_codec + 1),
|
||||
|
||||
# episode is equally important to season
|
||||
Eq(episode, season),
|
||||
|
||||
# release group is the next most wanted match
|
||||
Eq(release_group, format + audio_codec + resolution + video_codec + 1),
|
||||
Eq(release_group, source + audio_codec + resolution + video_codec + 1),
|
||||
|
||||
# format counts as much as audio_codec, resolution and video_codec
|
||||
Eq(format, audio_codec + resolution + video_codec),
|
||||
# source counts as much as audio_codec, resolution and video_codec
|
||||
Eq(source, audio_codec + resolution + video_codec),
|
||||
|
||||
# audio_codec is more valuable than video_codec
|
||||
Eq(audio_codec, video_codec + 1),
|
||||
|
@ -191,7 +191,7 @@ def solve_episode_equations():
|
|||
Eq(hearing_impaired, 1),
|
||||
]
|
||||
|
||||
return solve(equations, [hash, series, year, season, episode, release_group, format, audio_codec, resolution,
|
||||
return solve(equations, [hash, series, year, season, episode, release_group, source, audio_codec, resolution,
|
||||
hearing_impaired, video_codec])
|
||||
|
||||
|
||||
|
@ -199,24 +199,24 @@ def solve_movie_equations():
|
|||
from sympy import Eq, solve, symbols
|
||||
|
||||
hash, title, year, release_group = symbols('hash title year release_group')
|
||||
format, audio_codec, resolution, video_codec = symbols('format audio_codec resolution video_codec')
|
||||
source, audio_codec, resolution, video_codec = symbols('source audio_codec resolution video_codec')
|
||||
hearing_impaired = symbols('hearing_impaired')
|
||||
|
||||
equations = [
|
||||
# hash is best
|
||||
Eq(hash, title + year + release_group + format + audio_codec + resolution + video_codec),
|
||||
Eq(hash, title + year + release_group + source + audio_codec + resolution + video_codec),
|
||||
|
||||
# title counts for the most part in the total score
|
||||
Eq(title, year + release_group + format + audio_codec + resolution + video_codec + 1),
|
||||
Eq(title, year + release_group + source + audio_codec + resolution + video_codec + 1),
|
||||
|
||||
# year is the second most important part
|
||||
Eq(year, release_group + format + audio_codec + resolution + video_codec + 1),
|
||||
Eq(year, release_group + source + audio_codec + resolution + video_codec + 1),
|
||||
|
||||
# release group is the next most wanted match
|
||||
Eq(release_group, format + audio_codec + resolution + video_codec + 1),
|
||||
Eq(release_group, source + audio_codec + resolution + video_codec + 1),
|
||||
|
||||
# format counts as much as audio_codec, resolution and video_codec
|
||||
Eq(format, audio_codec + resolution + video_codec),
|
||||
# source counts as much as audio_codec, resolution and video_codec
|
||||
Eq(source, audio_codec + resolution + video_codec),
|
||||
|
||||
# audio_codec is more valuable than video_codec
|
||||
Eq(audio_codec, video_codec + 1),
|
||||
|
@ -231,5 +231,5 @@ def solve_movie_equations():
|
|||
Eq(hearing_impaired, 1),
|
||||
]
|
||||
|
||||
return solve(equations, [hash, title, year, release_group, format, audio_codec, resolution, hearing_impaired,
|
||||
return solve(equations, [hash, title, year, release_group, source, audio_codec, resolution, hearing_impaired,
|
||||
video_codec])
|
||||
|
|
|
@ -10,6 +10,7 @@ import pysrt
|
|||
from .score import get_equivalent_release_groups
|
||||
from .video import Episode, Movie
|
||||
from .utils import sanitize, sanitize_release_group
|
||||
from six import text_type
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -71,10 +72,12 @@ class Subtitle(object):
|
|||
if not self.content:
|
||||
return
|
||||
|
||||
if self.encoding:
|
||||
return self.content.decode(self.encoding, errors='replace')
|
||||
if not isinstance(self.content, text_type):
|
||||
if self.encoding:
|
||||
return self.content.decode(self.encoding, errors='replace')
|
||||
return self.content.decode(self.guess_encoding(), errors='replace')
|
||||
|
||||
return self.content.decode(self.guess_encoding(), errors='replace')
|
||||
return self.content
|
||||
|
||||
def is_valid(self):
|
||||
"""Check if a :attr:`text` is a valid SubRip format.
|
||||
|
@ -238,9 +241,9 @@ def guess_matches(video, guess, partial=False):
|
|||
# resolution
|
||||
if video.resolution and 'screen_size' in guess and guess['screen_size'] == video.resolution:
|
||||
matches.add('resolution')
|
||||
# format
|
||||
if video.format and 'format' in guess and guess['format'].lower() == video.format.lower():
|
||||
matches.add('format')
|
||||
# source
|
||||
if video.source and 'source' in guess and guess['source'].lower() == video.source.lower():
|
||||
matches.add('source')
|
||||
# video_codec
|
||||
if video.video_codec and 'video_codec' in guess and guess['video_codec'] == video.video_codec:
|
||||
matches.add('video_codec')
|
||||
|
|
|
@ -25,7 +25,7 @@ class Video(object):
|
|||
Represent a video, existing or not.
|
||||
|
||||
:param str name: name or path of the video.
|
||||
:param str format: format of the video (HDTV, WEB-DL, BluRay, ...).
|
||||
:param str source: source of the video (HDTV, Web, Blu-ray, ...).
|
||||
:param str release_group: release group of the video.
|
||||
:param str resolution: resolution of the video stream (480p, 720p, 1080p or 1080i).
|
||||
:param str video_codec: codec of the video stream.
|
||||
|
@ -36,13 +36,13 @@ class Video(object):
|
|||
:param set subtitle_languages: existing subtitle languages.
|
||||
|
||||
"""
|
||||
def __init__(self, name, format=None, release_group=None, resolution=None, video_codec=None, audio_codec=None,
|
||||
def __init__(self, name, source=None, release_group=None, resolution=None, video_codec=None, audio_codec=None,
|
||||
imdb_id=None, hashes=None, size=None, subtitle_languages=None):
|
||||
#: Name or path of the video
|
||||
self.name = name
|
||||
|
||||
#: Format of the video (HDTV, WEB-DL, BluRay, ...)
|
||||
self.format = format
|
||||
#: Source of the video (HDTV, Web, Blu-ray, ...)
|
||||
self.source = source
|
||||
|
||||
#: Release group of the video
|
||||
self.release_group = release_group
|
||||
|
@ -177,7 +177,7 @@ class Episode(Video):
|
|||
episode = min(episode_guess) if episode_guess and isinstance(episode_guess, list) else episode_guess
|
||||
|
||||
return cls(name, guess['title'], guess.get('season', 1), episode, title=guess.get('episode_title'),
|
||||
year=guess.get('year'), format=guess.get('format'), original_series='year' not in guess,
|
||||
year=guess.get('year'), source=guess.get('source'), original_series='year' not in guess,
|
||||
release_group=guess.get('release_group'), resolution=guess.get('screen_size'),
|
||||
video_codec=guess.get('video_codec'), audio_codec=guess.get('audio_codec'))
|
||||
|
||||
|
@ -225,7 +225,7 @@ class Movie(Video):
|
|||
if 'alternative_title' in guess:
|
||||
alternative_titles.append(u"%s %s" % (guess['title'], guess['alternative_title']))
|
||||
|
||||
return cls(name, guess['title'], format=guess.get('format'), release_group=guess.get('release_group'),
|
||||
return cls(name, guess['title'], source=guess.get('source'), release_group=guess.get('release_group'),
|
||||
resolution=guess.get('screen_size'), video_codec=guess.get('video_codec'),
|
||||
audio_codec=guess.get('audio_codec'), year=guess.get('year'), alternative_titles=alternative_titles)
|
||||
|
||||
|
|
|
@ -44,11 +44,11 @@ class Addic7edSubtitle(_Addic7edSubtitle):
|
|||
if not subliminal.score.episode_scores.get("addic7ed_boost"):
|
||||
return matches
|
||||
|
||||
# if the release group matches, the format is most likely correct, as well
|
||||
# if the release group matches, the source is most likely correct, as well
|
||||
if "release_group" in matches:
|
||||
matches.add("format")
|
||||
matches.add("source")
|
||||
|
||||
if {"series", "season", "episode", "year"}.issubset(matches) and "format" in matches:
|
||||
if {"series", "season", "episode", "year"}.issubset(matches) and "source" in matches:
|
||||
matches.add("addic7ed_boost")
|
||||
logger.info("Boosting Addic7ed subtitle by %s" % subliminal.score.episode_scores.get("addic7ed_boost"))
|
||||
return matches
|
||||
|
|
|
@ -39,7 +39,7 @@ class ArgenteamSubtitle(Subtitle):
|
|||
self.asked_for_release_group = asked_for_release_group
|
||||
self.asked_for_episode = asked_for_episode
|
||||
self.matches = None
|
||||
self.format = source
|
||||
self.source = source
|
||||
self.video_codec = video_codec
|
||||
self.tvdb_id = tvdb_id
|
||||
self.imdb_id = "tt" + imdb_id if imdb_id else None
|
||||
|
@ -55,7 +55,7 @@ class ArgenteamSubtitle(Subtitle):
|
|||
return self._release_info
|
||||
|
||||
combine = []
|
||||
for attr in ("format", "version"):
|
||||
for attr in ("source", "version"):
|
||||
value = getattr(self, attr)
|
||||
if value:
|
||||
combine.append(value)
|
||||
|
@ -115,22 +115,22 @@ class ArgenteamSubtitle(Subtitle):
|
|||
if any(r in sanitize_release_group(self.release) for r in get_equivalent_release_groups(rg)):
|
||||
matches.add('release_group')
|
||||
|
||||
# blatantly assume we've got a matching format if the release group matches
|
||||
# blatantly assume we've got a matching source if the release group matches
|
||||
# fixme: smart?
|
||||
#matches.add('format')
|
||||
#matches.add('source')
|
||||
|
||||
# resolution
|
||||
if video.resolution and self.version and str(video.resolution) in self.version.lower():
|
||||
matches.add('resolution')
|
||||
# format
|
||||
if video.format and self.format:
|
||||
formats = [video.format]
|
||||
if video.format == "WEB-DL":
|
||||
# source
|
||||
if video.source and self.source:
|
||||
formats = [video.source]
|
||||
if video.source == "Web":
|
||||
formats.append("WEB")
|
||||
|
||||
for fmt in formats:
|
||||
if fmt.lower() in self.format.lower():
|
||||
matches.add('format')
|
||||
if fmt.lower() in self.source.lower():
|
||||
matches.add('source')
|
||||
break
|
||||
|
||||
matches |= guess_matches(video, guessit(self.release_info), partial=True)
|
||||
|
|
|
@ -65,25 +65,25 @@ class BSPlayerSubtitle(Subtitle):
|
|||
if video.resolution and video.resolution.lower() in subtitle_filename:
|
||||
matches.add('resolution')
|
||||
|
||||
# format
|
||||
# source
|
||||
formats = []
|
||||
if video.format:
|
||||
formats = [video.format.lower()]
|
||||
if formats[0] == "web-dl":
|
||||
if video.source:
|
||||
formats = [video.source.lower()]
|
||||
if formats[0] == "web":
|
||||
formats.append("webdl")
|
||||
formats.append("webrip")
|
||||
formats.append("web ")
|
||||
for frmt in formats:
|
||||
if frmt.lower() in subtitle_filename:
|
||||
matches.add('format')
|
||||
matches.add('source')
|
||||
break
|
||||
|
||||
# video_codec
|
||||
if video.video_codec:
|
||||
video_codecs = [video.video_codec.lower()]
|
||||
if video_codecs[0] == "h264":
|
||||
if video_codecs[0] == "H.264":
|
||||
formats.append("x264")
|
||||
elif video_codecs[0] == "h265":
|
||||
elif video_codecs[0] == "H.265":
|
||||
formats.append("x265")
|
||||
for vc in formats:
|
||||
if vc.lower() in subtitle_filename:
|
||||
|
|
|
@ -112,9 +112,9 @@ class HosszupuskaSubtitle(Subtitle):
|
|||
# resolution
|
||||
if video.resolution and self.version and video.resolution in self.version.lower():
|
||||
matches.add('resolution')
|
||||
# format
|
||||
if video.format and self.version and video.format.lower() in self.version.lower():
|
||||
matches.add('format')
|
||||
# source
|
||||
if video.source and self.version and video.source.lower() in self.version.lower():
|
||||
matches.add('source')
|
||||
# other properties
|
||||
matches |= guess_matches(video, guessit(self.release_info))
|
||||
|
||||
|
|
|
@ -118,25 +118,25 @@ class LegendasdivxSubtitle(Subtitle):
|
|||
if video.resolution and video.resolution.lower() in description:
|
||||
matches.update(['resolution'])
|
||||
|
||||
# format
|
||||
# source
|
||||
formats = []
|
||||
if video.format:
|
||||
formats = [video.format.lower()]
|
||||
if formats[0] == "web-dl":
|
||||
if video.source:
|
||||
formats = [video.source.lower()]
|
||||
if formats[0] == "web":
|
||||
formats.append("webdl")
|
||||
formats.append("webrip")
|
||||
formats.append("web")
|
||||
for frmt in formats:
|
||||
if frmt in description:
|
||||
matches.update(['format'])
|
||||
matches.update(['source'])
|
||||
break
|
||||
|
||||
# video_codec
|
||||
if video.video_codec:
|
||||
video_codecs = [video.video_codec.lower()]
|
||||
if video_codecs[0] == "h264":
|
||||
if video_codecs[0] == "H.264":
|
||||
video_codecs.append("x264")
|
||||
elif video_codecs[0] == "h265":
|
||||
elif video_codecs[0] == "H.265":
|
||||
video_codecs.append("x265")
|
||||
for vc in video_codecs:
|
||||
if vc in description:
|
||||
|
|
|
@ -83,7 +83,7 @@ class ProviderSubtitleArchiveMixin(object):
|
|||
|
||||
# consider subtitle valid if:
|
||||
# - episode and season match
|
||||
# - format matches (if it was matched before)
|
||||
# - source matches (if it was matched before)
|
||||
# - release group matches (and we asked for one and it was matched, or it was not matched)
|
||||
# - not asked for forced and "forced" not in filename
|
||||
is_episode = subtitle.asked_for_episode
|
||||
|
@ -103,27 +103,27 @@ class ProviderSubtitleArchiveMixin(object):
|
|||
or (subtitle.is_pack and subtitle.asked_for_episode in episodes)
|
||||
) and guess.get("season") == subtitle.season):
|
||||
|
||||
format_matches = True
|
||||
wanted_format_but_not_found = False
|
||||
source_matches = True
|
||||
wanted_source_but_not_found = False
|
||||
|
||||
if "format" in subtitle.matches:
|
||||
format_matches = False
|
||||
if "source" in subtitle.matches:
|
||||
source_matches = False
|
||||
if isinstance(subtitle.releases, list):
|
||||
releases = ",".join(subtitle.releases).lower()
|
||||
else:
|
||||
releases = subtitle.releases.lower()
|
||||
|
||||
if "format" not in guess:
|
||||
wanted_format_but_not_found = True
|
||||
if "source" not in guess:
|
||||
wanted_source_but_not_found = True
|
||||
|
||||
else:
|
||||
formats = guess["format"]
|
||||
formats = guess["source"]
|
||||
if not isinstance(formats, list):
|
||||
formats = [formats]
|
||||
|
||||
for f in formats:
|
||||
format_matches = f.lower() in releases
|
||||
if format_matches:
|
||||
source_matches = f.lower() in releases
|
||||
if source_matches:
|
||||
break
|
||||
|
||||
release_group_matches = True
|
||||
|
@ -139,11 +139,11 @@ class ProviderSubtitleArchiveMixin(object):
|
|||
if asked_for_rlsgrp in sub_name_lower:
|
||||
release_group_matches = True
|
||||
|
||||
if release_group_matches and format_matches:
|
||||
if release_group_matches and source_matches:
|
||||
matching_sub = sub_name
|
||||
break
|
||||
|
||||
elif release_group_matches and wanted_format_but_not_found:
|
||||
elif release_group_matches and wanted_source_but_not_found:
|
||||
subs_unsure.append(sub_name)
|
||||
else:
|
||||
subs_fallback.append(sub_name)
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue