Upgraded GuessIt to 3.0.1

This commit is contained in:
Louis Vézina 2020-05-20 11:29:39 -04:00
parent 5b44007bbb
commit 376e13d7f1
110 changed files with 10933 additions and 3549 deletions

View file

@ -918,8 +918,8 @@ def refine_from_db(path, video):
if int(data['year']) > 0: video.year = int(data['year'])
video.series_tvdb_id = int(data['tvdbId'])
video.alternative_series = ast.literal_eval(data['alternateTitles'])
if not video.format:
video.format = str(data['format'])
if not video.source:
video.source = str(data['format'])
if not video.resolution:
video.resolution = str(data['resolution'])
if not video.video_codec:
@ -937,8 +937,8 @@ def refine_from_db(path, video):
if int(data['year']) > 0: video.year = int(data['year'])
if data['imdbId']: video.imdb_id = data['imdbId']
video.alternative_titles = ast.literal_eval(data['alternativeTitles'])
if not video.format:
if data['format']: video.format = data['format']
if not video.source:
if data['format']: video.source = data['format']
if not video.resolution:
if data['resolution']: video.resolution = data['resolution']
if not video.video_codec:

View file

@ -3,7 +3,12 @@
"""
Extracts as much information as possible from a video file.
"""
from . import monkeypatch as _monkeypatch
from .api import guessit, GuessItApi
from .options import ConfigurationException
from .rules.common.quantity import Size
from .__version__ import __version__
_monkeypatch.monkeypatch_rebulk()

View file

@ -17,7 +17,13 @@ from rebulk.__version__ import __version__ as __rebulk_version__
from guessit import api
from guessit.__version__ import __version__
from guessit.jsonutils import GuessitEncoder
from guessit.options import argument_parser, parse_options, load_config
from guessit.options import argument_parser, parse_options, load_config, merge_options
try:
from collections import OrderedDict
except ImportError: # pragma: no-cover
from ordereddict import OrderedDict # pylint:disable=import-error
def guess_filename(filename, options):
@ -45,7 +51,7 @@ def guess_filename(filename, options):
import yaml
from guessit import yamlutils
ystr = yaml.dump({filename: dict(guess)}, Dumper=yamlutils.CustomDumper, default_flow_style=False,
ystr = yaml.dump({filename: OrderedDict(guess)}, Dumper=yamlutils.CustomDumper, default_flow_style=False,
allow_unicode=True)
i = 0
for yline in ystr.splitlines():
@ -91,9 +97,9 @@ def display_properties(options):
print(4 * ' ' + '[!] %s' % (property_value,))
def main(args=None): # pylint:disable=too-many-branches
def fix_argv_encoding():
"""
Main function for entry point
Fix encoding of sys.argv on windows Python 2
"""
if six.PY2 and os.name == 'nt': # pragma: no cover
# see http://bugs.python.org/issue2128
@ -102,11 +108,21 @@ def main(args=None): # pylint:disable=too-many-branches
for i, j in enumerate(sys.argv):
sys.argv[i] = j.decode(locale.getpreferredencoding())
def main(args=None): # pylint:disable=too-many-branches
"""
Main function for entry point
"""
fix_argv_encoding()
if args is None: # pragma: no cover
options = parse_options()
else:
options = parse_options(args)
options = load_config(options)
config = load_config(options)
options = merge_options(config, options)
if options.get('verbose'):
logging.basicConfig(stream=sys.stdout, format='%(message)s')
logging.getLogger().setLevel(logging.DEBUG)
@ -126,7 +142,7 @@ def main(args=None): # pylint:disable=too-many-branches
if options.get('yaml'):
try:
import yaml # pylint:disable=unused-variable
import yaml # pylint:disable=unused-variable,unused-import
except ImportError: # pragma: no cover
del options['yaml']
print('PyYAML is not installed. \'--yaml\' option will be ignored ...', file=sys.stderr)

View file

@ -4,4 +4,4 @@
Version module
"""
# pragma: no cover
__version__ = '2.1.4'
__version__ = '3.1.1'

View file

@ -3,26 +3,28 @@
"""
API functions that can be used by external software
"""
try:
from collections import OrderedDict
except ImportError: # pragma: no-cover
from ordereddict import OrderedDict # pylint:disable=import-error
import os
import traceback
import six
from rebulk.introspector import introspect
from .rules import rebulk_builder
from .options import parse_options
from .__version__ import __version__
from .options import parse_options, load_config, merge_options
from .rules import rebulk_builder
class GuessitException(Exception):
"""
Exception raised when guessit fails to perform a guess because of an internal error.
"""
def __init__(self, string, options):
super(GuessitException, self).__init__("An internal error has occured in guessit.\n"
"===================== Guessit Exception Report =====================\n"
@ -41,12 +43,27 @@ class GuessitException(Exception):
self.options = options
def configure(options=None, rules_builder=rebulk_builder, force=False):
"""
Load configuration files and initialize rebulk rules if required.
:param options:
:type options: dict
:param rules_builder:
:type rules_builder:
:param force:
:type force: bool
:return:
"""
default_api.configure(options, rules_builder=rules_builder, force=force)
def guessit(string, options=None):
"""
Retrieves all matches from string as a dict
:param string: the filename or release name
:type string: str
:param options: the filename or release name
:param options:
:type options: str|dict
:return:
:rtype:
@ -58,65 +75,138 @@ def properties(options=None):
"""
Retrieves all properties with possible values that can be guessed
:param options:
:type options:
:type options: str|dict
:return:
:rtype:
"""
return default_api.properties(options)
def suggested_expected(titles, options=None):
"""
Return a list of suggested titles to be used as `expected_title` based on the list of titles
:param titles: the filename or release name
:type titles: list|set|dict
:param options:
:type options: str|dict
:return:
:rtype: list of str
"""
return default_api.suggested_expected(titles, options)
class GuessItApi(object):
"""
An api class that can be configured with custom Rebulk configuration.
"""
def __init__(self, rebulk):
"""
:param rebulk: Rebulk instance to use.
:type rebulk: Rebulk
:return:
:rtype:
"""
self.rebulk = rebulk
def __init__(self):
"""Default constructor."""
self.rebulk = None
self.config = None
self.load_config_options = None
self.advanced_config = None
@staticmethod
def _fix_option_encoding(value):
@classmethod
def _fix_encoding(cls, value):
if isinstance(value, list):
return [GuessItApi._fix_option_encoding(item) for item in value]
return [cls._fix_encoding(item) for item in value]
if isinstance(value, dict):
return {cls._fix_encoding(k): cls._fix_encoding(v) for k, v in value.items()}
if six.PY2 and isinstance(value, six.text_type):
return value.encode("utf-8")
return value.encode('utf-8')
if six.PY3 and isinstance(value, six.binary_type):
return value.decode('ascii')
return value
def guessit(self, string, options=None):
@classmethod
def _has_same_properties(cls, dic1, dic2, values):
for value in values:
if dic1.get(value) != dic2.get(value):
return False
return True
def configure(self, options=None, rules_builder=rebulk_builder, force=False, sanitize_options=True):
"""
Load configuration files and initialize rebulk rules if required.
:param options:
:type options: str|dict
:param rules_builder:
:type rules_builder:
:param force:
:type force: bool
:return:
:rtype: dict
"""
if sanitize_options:
options = parse_options(options, True)
options = self._fix_encoding(options)
if self.config is None or self.load_config_options is None or force or \
not self._has_same_properties(self.load_config_options,
options,
['config', 'no_user_config', 'no_default_config']):
config = load_config(options)
config = self._fix_encoding(config)
self.load_config_options = options
else:
config = self.config
advanced_config = merge_options(config.get('advanced_config'), options.get('advanced_config'))
should_build_rebulk = force or not self.rebulk or not self.advanced_config or \
self.advanced_config != advanced_config
if should_build_rebulk:
self.advanced_config = advanced_config
self.rebulk = rules_builder(advanced_config)
self.config = config
return self.config
def guessit(self, string, options=None): # pylint: disable=too-many-branches
"""
Retrieves all matches from string as a dict
:param string: the filename or release name
:type string: str
:param options: the filename or release name
:type string: str|Path
:param options:
:type options: str|dict
:return:
:rtype:
"""
try:
from pathlib import Path
if isinstance(string, Path):
try:
# Handle path-like object
string = os.fspath(string)
except AttributeError:
string = str(string)
except ImportError:
pass
try:
options = parse_options(options, True)
options = self._fix_encoding(options)
config = self.configure(options, sanitize_options=False)
options = merge_options(config, options)
result_decode = False
result_encode = False
fixed_options = {}
for (key, value) in options.items():
key = GuessItApi._fix_option_encoding(key)
value = GuessItApi._fix_option_encoding(value)
fixed_options[key] = value
options = fixed_options
if six.PY2:
if isinstance(string, six.text_type):
string = string.encode("utf-8")
result_decode = True
elif isinstance(string, six.binary_type):
string = six.binary_type(string)
if six.PY3:
if isinstance(string, six.binary_type):
string = string.decode('ascii')
result_encode = True
elif isinstance(string, six.text_type):
string = six.text_type(string)
if six.PY2 and isinstance(string, six.text_type):
string = string.encode("utf-8")
result_decode = True
if six.PY3 and isinstance(string, six.binary_type):
string = string.decode('ascii')
result_encode = True
matches = self.rebulk.matches(string, options)
if result_decode:
for match in matches:
@ -139,6 +229,10 @@ class GuessItApi(object):
:return:
:rtype:
"""
options = parse_options(options, True)
options = self._fix_encoding(options)
config = self.configure(options, sanitize_options=False)
options = merge_options(config, options)
unordered = introspect(self.rebulk, options).properties
ordered = OrderedDict()
for k in sorted(unordered.keys(), key=six.text_type):
@ -147,5 +241,23 @@ class GuessItApi(object):
ordered = self.rebulk.customize_properties(ordered)
return ordered
def suggested_expected(self, titles, options=None):
"""
Return a list of suggested titles to be used as `expected_title` based on the list of titles
:param titles: the filename or release name
:type titles: list|set|dict
:param options:
:type options: str|dict
:return:
:rtype: list of str
"""
suggested = []
for title in titles:
guess = self.guessit(title, options)
if len(guess) != 2 or 'title' not in guess:
suggested.append(title)
default_api = GuessItApi(rebulk_builder())
return suggested
default_api = GuessItApi()

View file

@ -4,7 +4,7 @@
Backports
"""
# pragma: no-cover
# pylint: disabled
# pylint: skip-file
def cmp_to_key(mycmp):
"""functools.cmp_to_key backport"""

View file

@ -1,5 +1,586 @@
{
"expected_title": [
"OSS 117"
]
}
"OSS 117",
"This is Us"
],
"allowed_countries": [
"au",
"gb",
"us"
],
"allowed_languages": [
"ca",
"cs",
"de",
"en",
"es",
"fr",
"he",
"hi",
"hu",
"it",
"ja",
"ko",
"mul",
"nl",
"no",
"pl",
"pt",
"ro",
"ru",
"sv",
"te",
"uk",
"und"
],
"advanced_config": {
"common_words": [
"ca",
"cat",
"de",
"he",
"it",
"no",
"por",
"rum",
"se",
"st",
"sub"
],
"groups": {
"starting": "([{",
"ending": ")]}"
},
"audio_codec": {
"audio_channels": {
"1.0": [
"1ch",
"mono"
],
"2.0": [
"2ch",
"stereo",
"re:(2[\\W_]0(?:ch)?)(?=[^\\d]|$)"
],
"5.1": [
"5ch",
"6ch",
"re:(5[\\W_][01](?:ch)?)(?=[^\\d]|$)",
"re:(6[\\W_]0(?:ch)?)(?=[^\\d]|$)"
],
"7.1": [
"7ch",
"8ch",
"re:(7[\\W_][01](?:ch)?)(?=[^\\d]|$)"
]
}
},
"container": {
"subtitles": [
"srt",
"idx",
"sub",
"ssa",
"ass"
],
"info": [
"nfo"
],
"videos": [
"3g2",
"3gp",
"3gp2",
"asf",
"avi",
"divx",
"flv",
"iso",
"m4v",
"mk2",
"mk3d",
"mka",
"mkv",
"mov",
"mp4",
"mp4a",
"mpeg",
"mpg",
"ogg",
"ogm",
"ogv",
"qt",
"ra",
"ram",
"rm",
"ts",
"vob",
"wav",
"webm",
"wma",
"wmv"
],
"torrent": [
"torrent"
],
"nzb": [
"nzb"
]
},
"country": {
"synonyms": {
"ES": [
"españa"
],
"GB": [
"UK"
],
"BR": [
"brazilian",
"bra"
],
"CA": [
"québec",
"quebec",
"qc"
],
"MX": [
"Latinoamérica",
"latin america"
]
}
},
"episodes": {
"season_max_range": 100,
"episode_max_range": 100,
"max_range_gap": 1,
"season_markers": [
"s"
],
"season_ep_markers": [
"x"
],
"disc_markers": [
"d"
],
"episode_markers": [
"xe",
"ex",
"ep",
"e",
"x"
],
"range_separators": [
"-",
"~",
"to",
"a"
],
"discrete_separators": [
"+",
"&",
"and",
"et"
],
"season_words": [
"season",
"saison",
"seizoen",
"seasons",
"saisons",
"tem",
"temp",
"temporada",
"temporadas",
"stagione"
],
"episode_words": [
"episode",
"episodes",
"eps",
"ep",
"episodio",
"episodios",
"capitulo",
"capitulos"
],
"of_words": [
"of",
"sur"
],
"all_words": [
"All"
]
},
"language": {
"synonyms": {
"ell": [
"gr",
"greek"
],
"spa": [
"esp",
"español",
"espanol"
],
"fra": [
"français",
"vf",
"vff",
"vfi",
"vfq"
],
"swe": [
"se"
],
"por_BR": [
"po",
"pb",
"pob",
"ptbr",
"br",
"brazilian"
],
"deu_CH": [
"swissgerman",
"swiss german"
],
"nld_BE": [
"flemish"
],
"cat": [
"català",
"castellano",
"espanol castellano",
"español castellano"
],
"ces": [
"cz"
],
"ukr": [
"ua"
],
"zho": [
"cn"
],
"jpn": [
"jp"
],
"hrv": [
"scr"
],
"mul": [
"multi",
"dl"
]
},
"subtitle_affixes": [
"sub",
"subs",
"esub",
"esubs",
"subbed",
"custom subbed",
"custom subs",
"custom sub",
"customsubbed",
"customsubs",
"customsub",
"soft subtitles",
"soft subs"
],
"subtitle_prefixes": [
"st",
"vost",
"subforced",
"fansub",
"hardsub",
"legenda",
"legendas",
"legendado",
"subtitulado",
"soft",
"subtitles"
],
"subtitle_suffixes": [
"subforced",
"fansub",
"hardsub"
],
"language_affixes": [
"dublado",
"dubbed",
"dub"
],
"language_prefixes": [
"true"
],
"language_suffixes": [
"audio"
],
"weak_affixes": [
"v",
"audio",
"true"
]
},
"part": {
"prefixes": [
"pt",
"part"
]
},
"release_group": {
"forbidden_names": [
"bonus",
"by",
"for",
"par",
"pour",
"rip"
],
"ignored_seps": "[]{}()"
},
"screen_size": {
"frame_rates": [
"23.976",
"24",
"25",
"29.970",
"30",
"48",
"50",
"60",
"120"
],
"min_ar": 1.333,
"max_ar": 1.898,
"interlaced": [
"360",
"480",
"576",
"900",
"1080"
],
"progressive": [
"360",
"480",
"540",
"576",
"900",
"1080",
"368",
"720",
"1440",
"2160",
"4320"
]
},
"website": {
"safe_tlds": [
"com",
"net",
"org"
],
"safe_subdomains": [
"www"
],
"safe_prefixes": [
"co",
"com",
"net",
"org"
],
"prefixes": [
"from"
]
},
"streaming_service": {
"A&E": [
"AE",
"A&E"
],
"ABC": "AMBC",
"ABC Australia": "AUBC",
"Al Jazeera English": "AJAZ",
"AMC": "AMC",
"Amazon Prime": [
"AMZN",
"Amazon",
"re:Amazon-?Prime"
],
"Adult Swim": [
"AS",
"re:Adult-?Swim"
],
"America's Test Kitchen": "ATK",
"Animal Planet": "ANPL",
"AnimeLab": "ANLB",
"AOL": "AOL",
"ARD": "ARD",
"BBC iPlayer": [
"iP",
"re:BBC-?iPlayer"
],
"BravoTV": "BRAV",
"Canal+": "CNLP",
"Cartoon Network": "CN",
"CBC": "CBC",
"CBS": "CBS",
"CNBC": "CNBC",
"Comedy Central": [
"CC",
"re:Comedy-?Central"
],
"Channel 4": "4OD",
"CHRGD": "CHGD",
"Cinemax": "CMAX",
"Country Music Television": "CMT",
"Comedians in Cars Getting Coffee": "CCGC",
"Crunchy Roll": [
"CR",
"re:Crunchy-?Roll"
],
"Crackle": "CRKL",
"CSpan": "CSPN",
"CTV": "CTV",
"CuriosityStream": "CUR",
"CWSeed": "CWS",
"Daisuki": "DSKI",
"DC Universe": "DCU",
"Deadhouse Films": "DHF",
"DramaFever": [
"DF",
"DramaFever"
],
"Digiturk Diledigin Yerde": "DDY",
"Discovery": [
"DISC",
"Discovery"
],
"Disney": [
"DSNY",
"Disney"
],
"DIY Network": "DIY",
"Doc Club": "DOCC",
"DPlay": "DPLY",
"E!": "ETV",
"ePix": "EPIX",
"El Trece": "ETTV",
"ESPN": "ESPN",
"Esquire": "ESQ",
"Family": "FAM",
"Family Jr": "FJR",
"Food Network": "FOOD",
"Fox": "FOX",
"Freeform": "FREE",
"FYI Network": "FYI",
"Global": "GLBL",
"GloboSat Play": "GLOB",
"Hallmark": "HLMK",
"HBO Go": [
"HBO",
"re:HBO-?Go"
],
"HGTV": "HGTV",
"History": [
"HIST",
"History"
],
"Hulu": "HULU",
"Investigation Discovery": "ID",
"IFC": "IFC",
"iTunes": "iTunes",
"ITV": "ITV",
"Knowledge Network": "KNOW",
"Lifetime": "LIFE",
"Motor Trend OnDemand": "MTOD",
"MBC": [
"MBC",
"MBCVOD"
],
"MSNBC": "MNBC",
"MTV": "MTV",
"National Geographic": [
"NATG",
"re:National-?Geographic"
],
"NBA TV": [
"NBA",
"re:NBA-?TV"
],
"NBC": "NBC",
"Netflix": [
"NF",
"Netflix"
],
"NFL": "NFL",
"NFL Now": "NFLN",
"NHL GameCenter": "GC",
"Nickelodeon": [
"NICK",
"Nickelodeon"
],
"Norsk Rikskringkasting": "NRK",
"OnDemandKorea": [
"ODK",
"OnDemandKorea"
],
"PBS": "PBS",
"PBS Kids": "PBSK",
"Playstation Network": "PSN",
"Pluzz": "PLUZ",
"RTE One": "RTE",
"SBS (AU)": "SBS",
"SeeSo": [
"SESO",
"SeeSo"
],
"Shomi": "SHMI",
"Spike": "SPIK",
"Spike TV": [
"SPKE",
"re:Spike-?TV"
],
"Sportsnet": "SNET",
"Sprout": "SPRT",
"Stan": "STAN",
"Starz": "STZ",
"Sveriges Television": "SVT",
"SwearNet": "SWER",
"Syfy": "SYFY",
"TBS": "TBS",
"TFou": "TFOU",
"The CW": [
"CW",
"re:The-?CW"
],
"TLC": "TLC",
"TubiTV": "TUBI",
"TV3 Ireland": "TV3",
"TV4 Sweeden": "TV4",
"TVING": "TVING",
"TV Land": [
"TVL",
"re:TV-?Land"
],
"UFC": "UFC",
"UKTV": "UKTV",
"Univision": "UNIV",
"USA Network": "USAN",
"Velocity": "VLCT",
"VH1": "VH1",
"Viceland": "VICE",
"Viki": "VIKI",
"Vimeo": "VMEO",
"VRV": "VRV",
"W Network": "WNET",
"WatchMe": "WME",
"WWE Network": "WWEN",
"Xbox Video": "XBOX",
"Yahoo": "YHOO",
"YouTube Red": "RED",
"ZDF": "ZDF"
}
}
}

View file

@ -4,14 +4,10 @@
JSON Utils
"""
import json
try:
from collections import OrderedDict
except ImportError: # pragma: no-cover
from ordereddict import OrderedDict # pylint:disable=import-error
from six import text_type
from rebulk.match import Match
class GuessitEncoder(json.JSONEncoder):
"""
JSON Encoder for guessit response
@ -19,14 +15,8 @@ class GuessitEncoder(json.JSONEncoder):
def default(self, o): # pylint:disable=method-hidden
if isinstance(o, Match):
ret = OrderedDict()
ret['value'] = o.value
if o.raw:
ret['raw'] = o.raw
ret['start'] = o.start
ret['end'] = o.end
return ret
elif hasattr(o, 'name'): # Babelfish languages/countries long name
return str(o.name)
else: # pragma: no cover
return str(o)
return o.advanced
if hasattr(o, 'name'): # Babelfish languages/countries long name
return text_type(o.name)
# pragma: no cover
return text_type(o)

View file

@ -0,0 +1,34 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Monkeypatch initialisation functions
"""
try:
from collections import OrderedDict
except ImportError: # pragma: no-cover
from ordereddict import OrderedDict # pylint:disable=import-error
from rebulk.match import Match
def monkeypatch_rebulk():
"""Monkeypatch rebulk classes"""
@property
def match_advanced(self):
"""
Build advanced dict from match
:param self:
:return:
"""
ret = OrderedDict()
ret['value'] = self.value
if self.raw:
ret['raw'] = self.raw
ret['start'] = self.start
ret['end'] = self.end
return ret
Match.advanced = match_advanced

View file

@ -3,10 +3,12 @@
"""
Options
"""
import copy
import json
import os
import pkgutil
import shlex
from argparse import ArgumentParser
import six
@ -42,6 +44,10 @@ def build_argument_parser():
help='Expected title to parse (can be used multiple times)')
naming_opts.add_argument('-G', '--expected-group', action='append', dest='expected_group', default=None,
help='Expected release group (can be used multiple times)')
naming_opts.add_argument('--includes', action='append', dest='includes', default=None,
help='List of properties to be detected')
naming_opts.add_argument('--excludes', action='append', dest='excludes', default=None,
help='List of properties to be ignored')
input_opts = opts.add_argument_group("Input")
input_opts.add_argument('-f', '--input-file', dest='input_file', default=None,
@ -65,14 +71,20 @@ def build_argument_parser():
conf_opts = opts.add_argument_group("Configuration")
conf_opts.add_argument('-c', '--config', dest='config', action='append', default=None,
help='Filepath to the configuration file. Configuration contains the same options as '
'those command line options, but option names have "-" characters replaced with "_". '
'If not defined, guessit tries to read a configuration default configuration file at '
'~/.guessit/options.(json|yml|yaml) and ~/.config/guessit/options.(json|yml|yaml). '
'Set to "false" to disable default configuration file loading.')
conf_opts.add_argument('--no-embedded-config', dest='no_embedded_config', action='store_true',
help='Filepath to configuration file. Configuration file contains the same '
'options as those from command line options, but option names have "-" characters '
'replaced with "_". This configuration will be merged with default and user '
'configuration files.')
conf_opts.add_argument('--no-user-config', dest='no_user_config', action='store_true',
default=None,
help='Disable default configuration.')
help='Disable user configuration. If not defined, guessit tries to read configuration files '
'at ~/.guessit/options.(json|yml|yaml) and ~/.config/guessit/options.(json|yml|yaml)')
conf_opts.add_argument('--no-default-config', dest='no_default_config', action='store_true',
default=None,
help='Disable default configuration. This should be done only if you are providing a full '
'configuration through user configuration or --config option. If no "advanced_config" '
'is provided by another configuration file, it will still be loaded from default '
'configuration.')
information_opts = opts.add_argument_group("Information")
information_opts.add_argument('-p', '--properties', dest='properties', action='store_true', default=None,
@ -92,7 +104,7 @@ def parse_options(options=None, api=False):
:param options:
:type options:
:param api
:type boolean
:type api: boolean
:return:
:rtype:
"""
@ -116,93 +128,113 @@ class ConfigurationException(Exception):
"""
Exception related to configuration file.
"""
pass
pass # pylint:disable=unnecessary-pass
def load_config(options):
"""
Load configuration from configuration file, if defined.
Load options from configuration files, if defined and present.
:param options:
:type options:
:return:
:rtype:
"""
config_files_enabled = True
custom_config_files = None
if options.get('config') is not None:
custom_config_files = options.get('config')
if not custom_config_files \
or not custom_config_files[0] \
or custom_config_files[0].lower() in ['0', 'no', 'false', 'disabled']:
config_files_enabled = False
configurations = []
if config_files_enabled:
if not options.get('no_default_config'):
default_options_data = pkgutil.get_data('guessit', 'config/options.json').decode('utf-8')
default_options = json.loads(default_options_data)
configurations.append(default_options)
config_files = []
if not options.get('no_user_config'):
home_directory = os.path.expanduser("~")
cwd = os.getcwd()
yaml_supported = False
try:
import yaml # pylint: disable=unused-variable
import yaml # pylint:disable=unused-variable,unused-import
yaml_supported = True
except ImportError:
pass
config_file_locations = get_config_file_locations(home_directory, cwd, yaml_supported)
config_file_locations = get_options_file_locations(home_directory, cwd, yaml_supported)
config_files = [f for f in config_file_locations if os.path.exists(f)]
if custom_config_files:
config_files = config_files + custom_config_files
custom_config_files = options.get('config')
if custom_config_files:
config_files = config_files + custom_config_files
for config_file in config_files:
config_file_options = load_config_file(config_file)
if config_file_options:
configurations.append(config_file_options)
if not options.get('no_embedded_config'):
embedded_options_data = pkgutil.get_data('guessit', 'config/options.json').decode("utf-8")
embedded_options = json.loads(embedded_options_data)
configurations.append(embedded_options)
for config_file in config_files:
config_file_options = load_config_file(config_file)
if config_file_options:
configurations.append(config_file_options)
config = {}
if configurations:
configurations.append(options)
return merge_configurations(*configurations)
config = merge_options(*configurations)
return options
if 'advanced_config' not in config:
# Guessit doesn't work without advanced_config, so we use default if no configuration files provides it.
default_options_data = pkgutil.get_data('guessit', 'config/options.json').decode('utf-8')
default_options = json.loads(default_options_data)
config['advanced_config'] = default_options['advanced_config']
return config
def merge_configurations(*configurations):
def merge_options(*options):
"""
Merge configurations into a single options dict.
:param configurations:
:type configurations:
Merge options into a single options dict.
:param options:
:type options:
:return:
:rtype:
"""
merged = {}
if options:
if options[0]:
merged.update(copy.deepcopy(options[0]))
for options in configurations:
pristine = options.get('pristine')
for options in options[1:]:
if options:
pristine = options.get('pristine')
if pristine:
if pristine is True:
merged = {}
else:
for to_reset in pristine:
if to_reset in merged:
del merged[to_reset]
if pristine is True:
merged = {}
elif pristine:
for to_reset in pristine:
if to_reset in merged:
del merged[to_reset]
for (option, value) in options.items():
if value is not None and option != 'pristine':
if option in merged.keys() and isinstance(merged[option], list):
merged[option].extend(value)
elif isinstance(value, list):
merged[option] = list(value)
else:
merged[option] = value
for (option, value) in options.items():
merge_option_value(option, value, merged)
return merged
def merge_option_value(option, value, merged):
"""
Merge option value
:param option:
:param value:
:param merged:
:return:
"""
if value is not None and option != 'pristine':
if option in merged.keys() and isinstance(merged[option], list):
for val in value:
if val not in merged[option]:
merged[option].append(val)
elif option in merged.keys() and isinstance(merged[option], dict):
merged[option] = merge_options(merged[option], value)
elif isinstance(value, list):
merged[option] = list(value)
else:
merged[option] = value
def load_config_file(filepath):
"""
Load a configuration as an options dict.
@ -220,17 +252,24 @@ def load_config_file(filepath):
try:
import yaml
with open(filepath) as config_file_data:
return yaml.load(config_file_data)
return yaml.load(config_file_data, yaml.SafeLoader)
except ImportError: # pragma: no cover
raise ConfigurationException('Configuration file extension is not supported. '
'PyYAML should be installed to support "%s" file' % (
filepath,))
try:
# Try to load input as JSON
return json.loads(filepath)
except: # pylint: disable=bare-except
pass
raise ConfigurationException('Configuration file extension is not supported for "%s" file.' % (filepath,))
def get_config_file_locations(homedir, cwd, yaml_supported=False):
def get_options_file_locations(homedir, cwd, yaml_supported=False):
"""
Get all possible locations for configuration file.
Get all possible locations for options file.
:param homedir: user home directory
:type homedir: basestring
:param cwd: current working directory

View file

@ -10,7 +10,7 @@ from .markers.groups import groups
from .properties.episodes import episodes
from .properties.container import container
from .properties.format import format_
from .properties.source import source
from .properties.video_codec import video_codec
from .properties.audio_codec import audio_codec
from .properties.screen_size import screen_size
@ -24,6 +24,7 @@ from .properties.release_group import release_group
from .properties.streaming_service import streaming_service
from .properties.other import other
from .properties.size import size
from .properties.bit_rate import bit_rate
from .properties.edition import edition
from .properties.cds import cds
from .properties.bonus import bonus
@ -36,44 +37,50 @@ from .properties.type import type_
from .processors import processors
def rebulk_builder():
def rebulk_builder(config):
"""
Default builder for main Rebulk object used by api.
:return: Main Rebulk object
:rtype: Rebulk
"""
def _config(name):
return config.get(name, {})
rebulk = Rebulk()
rebulk.rebulk(path())
rebulk.rebulk(groups())
common_words = frozenset(_config('common_words'))
rebulk.rebulk(episodes())
rebulk.rebulk(container())
rebulk.rebulk(format_())
rebulk.rebulk(video_codec())
rebulk.rebulk(audio_codec())
rebulk.rebulk(screen_size())
rebulk.rebulk(website())
rebulk.rebulk(date())
rebulk.rebulk(title())
rebulk.rebulk(episode_title())
rebulk.rebulk(language())
rebulk.rebulk(country())
rebulk.rebulk(release_group())
rebulk.rebulk(streaming_service())
rebulk.rebulk(other())
rebulk.rebulk(size())
rebulk.rebulk(edition())
rebulk.rebulk(cds())
rebulk.rebulk(bonus())
rebulk.rebulk(film())
rebulk.rebulk(part())
rebulk.rebulk(crc())
rebulk.rebulk(path(_config('path')))
rebulk.rebulk(groups(_config('groups')))
rebulk.rebulk(processors())
rebulk.rebulk(episodes(_config('episodes')))
rebulk.rebulk(container(_config('container')))
rebulk.rebulk(source(_config('source')))
rebulk.rebulk(video_codec(_config('video_codec')))
rebulk.rebulk(audio_codec(_config('audio_codec')))
rebulk.rebulk(screen_size(_config('screen_size')))
rebulk.rebulk(website(_config('website')))
rebulk.rebulk(date(_config('date')))
rebulk.rebulk(title(_config('title')))
rebulk.rebulk(episode_title(_config('episode_title')))
rebulk.rebulk(language(_config('language'), common_words))
rebulk.rebulk(country(_config('country'), common_words))
rebulk.rebulk(release_group(_config('release_group')))
rebulk.rebulk(streaming_service(_config('streaming_service')))
rebulk.rebulk(other(_config('other')))
rebulk.rebulk(size(_config('size')))
rebulk.rebulk(bit_rate(_config('bit_rate')))
rebulk.rebulk(edition(_config('edition')))
rebulk.rebulk(cds(_config('cds')))
rebulk.rebulk(bonus(_config('bonus')))
rebulk.rebulk(film(_config('film')))
rebulk.rebulk(part(_config('part')))
rebulk.rebulk(crc(_config('crc')))
rebulk.rebulk(mimetype())
rebulk.rebulk(type_())
rebulk.rebulk(processors(_config('processors')))
rebulk.rebulk(mimetype(_config('mimetype')))
rebulk.rebulk(type_(_config('type')))
def customize_properties(properties):
"""

View file

@ -13,9 +13,12 @@ def marker_comparator_predicate(match):
"""
Match predicate used in comparator
"""
return not match.private and \
match.name not in ['proper_count', 'title', 'episode_title', 'alternative_title'] and \
not (match.name == 'container' and 'extension' in match.tags)
return (
not match.private
and match.name not in ('proper_count', 'title')
and not (match.name == 'container' and 'extension' in match.tags)
and not (match.name == 'other' and match.value == 'Rip')
)
def marker_weight(matches, marker, predicate):
@ -50,9 +53,8 @@ def marker_comparator(matches, markers, predicate):
matches_count = marker_weight(matches, marker2, predicate) - marker_weight(matches, marker1, predicate)
if matches_count:
return matches_count
len_diff = len(marker2) - len(marker1)
if len_diff:
return len_diff
# give preference to rightmost path
return markers.index(marker2) - markers.index(marker1)
return comparator

View file

@ -42,7 +42,7 @@ def _is_int(string):
return False
def _guess_day_first_parameter(groups):
def _guess_day_first_parameter(groups): # pylint:disable=inconsistent-return-statements
"""
If day_first is not defined, use some heuristic to fix it.
It helps to solve issues with python dateutils 2.5.3 parser changes.
@ -57,17 +57,17 @@ def _guess_day_first_parameter(groups):
if _is_int(groups[0]) and valid_year(int(groups[0][:4])):
return False
# If match ends with a long year, the day_first is forced to true.
elif _is_int(groups[-1]) and valid_year(int(groups[-1][-4:])):
if _is_int(groups[-1]) and valid_year(int(groups[-1][-4:])):
return True
# If match starts with a short year, then day_first is force to false.
elif _is_int(groups[0]) and int(groups[0][:2]) > 31:
if _is_int(groups[0]) and int(groups[0][:2]) > 31:
return False
# If match ends with a short year, then day_first is force to true.
elif _is_int(groups[-1]) and int(groups[-1][-2:]) > 31:
if _is_int(groups[-1]) and int(groups[-1][-2:]) > 31:
return True
def search_date(string, year_first=None, day_first=None):
def search_date(string, year_first=None, day_first=None): # pylint:disable=inconsistent-return-statements
"""Looks for date patterns, and if found return the date and group span.
Assumes there are sentinels at the beginning and end of the string that

View file

@ -25,7 +25,7 @@ def _potential_before(i, input_string):
:return:
:rtype: bool
"""
return i - 2 >= 0 and input_string[i] == input_string[i - 2] and input_string[i - 1] not in seps
return i - 1 >= 0 and input_string[i] in seps and input_string[i - 2] in seps and input_string[i - 1] not in seps
def _potential_after(i, input_string):

View file

@ -0,0 +1,27 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Pattern utility functions
"""
def is_disabled(context, name):
"""Whether a specific pattern is disabled.
The context object might define an inclusion list (includes) or an exclusion list (excludes)
A pattern is considered disabled if it's found in the exclusion list or
it's not found in the inclusion list and the inclusion list is not empty or not defined.
:param context:
:param name:
:return:
"""
if not context:
return False
excludes = context.get('excludes')
if excludes and name in excludes:
return True
includes = context.get('includes')
return includes and name not in includes

View file

@ -0,0 +1,106 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Quantities: Size
"""
import re
from abc import abstractmethod
import six
from ..common import seps
class Quantity(object):
"""
Represent a quantity object with magnitude and units.
"""
parser_re = re.compile(r'(?P<magnitude>\d+(?:[.]\d+)?)(?P<units>[^\d]+)?')
def __init__(self, magnitude, units):
self.magnitude = magnitude
self.units = units
@classmethod
@abstractmethod
def parse_units(cls, value):
"""
Parse a string to a proper unit notation.
"""
raise NotImplementedError
@classmethod
def fromstring(cls, string):
"""
Parse the string into a quantity object.
:param string:
:return:
"""
values = cls.parser_re.match(string).groupdict()
try:
magnitude = int(values['magnitude'])
except ValueError:
magnitude = float(values['magnitude'])
units = cls.parse_units(values['units'])
return cls(magnitude, units)
def __hash__(self):
return hash(str(self))
def __eq__(self, other):
if isinstance(other, six.string_types):
return str(self) == other
if not isinstance(other, self.__class__):
return NotImplemented
return self.magnitude == other.magnitude and self.units == other.units
def __ne__(self, other):
return not self == other
def __repr__(self):
return '<{0} [{1}]>'.format(self.__class__.__name__, self)
def __str__(self):
return '{0}{1}'.format(self.magnitude, self.units)
class Size(Quantity):
"""
Represent size.
e.g.: 1.1GB, 300MB
"""
@classmethod
def parse_units(cls, value):
return value.strip(seps).upper()
class BitRate(Quantity):
"""
Represent bit rate.
e.g.: 320Kbps, 1.5Mbps
"""
@classmethod
def parse_units(cls, value):
value = value.strip(seps).capitalize()
for token in ('bits', 'bit'):
value = value.replace(token, 'bps')
return value
class FrameRate(Quantity):
"""
Represent frame rate.
e.g.: 24fps, 60fps
"""
@classmethod
def parse_units(cls, value):
return 'fps'

View file

@ -28,7 +28,7 @@ def int_coercable(string):
return False
def compose(*validators):
def and_(*validators):
"""
Compose validators functions
:param validators:
@ -49,3 +49,26 @@ def compose(*validators):
return False
return True
return composed
def or_(*validators):
"""
Compose validators functions
:param validators:
:type validators:
:return:
:rtype:
"""
def composed(string):
"""
Composed validators function
:param string:
:type string:
:return:
:rtype:
"""
for validator in validators:
if validator(string):
return True
return False
return composed

View file

@ -32,48 +32,3 @@ def iter_words(string):
i += 1
if inside_word:
yield _Word(span=(last_sep_index+1, i), value=string[last_sep_index+1:i])
# list of common words which could be interpreted as properties, but which
# are far too common to be able to say they represent a property in the
# middle of a string (where they most likely carry their commmon meaning)
COMMON_WORDS = frozenset([
# english words
'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to',
'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan',
'fry', 'cop', 'zen', 'gay', 'fat', 'one', 'cherokee', 'got', 'an', 'as',
'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi', 'bb',
'bt', 'tv', 'aw', 'by', 'md', 'mp', 'cd', 'lt', 'gt', 'in', 'ad', 'ice',
'ay', 'at', 'star', 'so', 'he', 'do', 'ax', 'mx',
# french words
'bas', 'de', 'le', 'son', 'ne', 'ca', 'ce', 'et', 'que',
'mal', 'est', 'vol', 'or', 'mon', 'se', 'je', 'tu', 'me',
'ne', 'ma', 'va', 'au', 'lu',
# japanese words,
'wa', 'ga', 'ao',
# spanish words
'la', 'el', 'del', 'por', 'mar', 'al',
# italian words
'un',
# other
'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii',
'vi', 'ben', 'da', 'lt', 'ch', 'sr', 'ps', 'cx', 'vo',
# new from babelfish
'mkv', 'avi', 'dmd', 'the', 'dis', 'cut', 'stv', 'des', 'dia', 'and',
'cab', 'sub', 'mia', 'rim', 'las', 'une', 'par', 'srt', 'ano', 'toy',
'job', 'gag', 'reel', 'www', 'for', 'ayu', 'csi', 'ren', 'moi', 'sur',
'fer', 'fun', 'two', 'big', 'psy', 'air',
# movie title
'brazil', 'jordan',
# release groups
'bs', # Bosnian
'kz',
# countries
'gt', 'lt', 'im',
# part/pt
'pt',
# screener
'scr',
# quality
'sd', 'hr'
])

View file

@ -6,17 +6,20 @@ Groups markers (...), [...] and {...}
from rebulk import Rebulk
def groups():
def groups(config):
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk()
rebulk.defaults(name="group", marker=True)
starting = '([{'
ending = ')]}'
starting = config['starting']
ending = config['ending']
def mark_groups(input_string):
"""

View file

@ -8,9 +8,12 @@ from rebulk import Rebulk
from rebulk.utils import find_all
def path():
def path(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
@ -22,6 +25,7 @@ def path():
Functional pattern to mark path elements.
:param input_string:
:param context:
:return:
"""
ret = []

View file

@ -0,0 +1,20 @@
"""
Match processors
"""
from guessit.rules.common import seps
def strip(match, chars=seps):
"""
Strip given characters from match.
:param chars:
:param match:
:return:
"""
while match.input_string[match.start] in chars:
match.start += 1
while match.input_string[match.end - 1] in chars:
match.end -= 1
if not match:
return False

View file

@ -36,6 +36,7 @@ class EnlargeGroupMatches(CustomRule):
if starting or ending:
return starting, ending
return False
def then(self, matches, when_response, context):
starting, ending = when_response
@ -193,6 +194,23 @@ class SeasonYear(Rule):
return ret
class YearSeason(Rule):
"""
If a year is found, no season found, and episode is found, create an match with season.
"""
priority = POST_PROCESS
consequence = AppendMatch
def when(self, matches, context):
ret = []
if not matches.named('season') and matches.named('episode'):
for year in matches.named('year'):
season = copy.copy(year)
season.name = 'season'
ret.append(season)
return ret
class Processors(CustomRule):
"""
Empty rule for ordering post_processing properly.
@ -226,13 +244,16 @@ class StripSeparators(CustomRule):
match.raw_end -= 1
def processors():
def processors(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
return Rebulk().rules(EnlargeGroupMatches, EquivalentHoles,
RemoveLessSpecificSeasonEpisode('season'),
RemoveLessSpecificSeasonEpisode('episode'),
RemoveAmbiguous, SeasonYear, Processors, StripSeparators)
RemoveAmbiguous, SeasonYear, YearSeason, Processors, StripSeparators)

View file

@ -3,22 +3,28 @@
"""
audio_codec, audio_profile and audio_channels property
"""
from rebulk import Rebulk, Rule, RemoveMatch
from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch
from ..common import dash
from ..common.pattern import is_disabled
from ..common.validators import seps_before, seps_after
audio_properties = ['audio_codec', 'audio_profile', 'audio_channels']
def audio_codec():
def audio_codec(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk = Rebulk()\
.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])\
.string_defaults(ignore_case=True)
def audio_codec_priority(match1, match2):
"""
@ -36,37 +42,53 @@ def audio_codec():
return match1
return '__default__'
rebulk.defaults(name="audio_codec", conflict_solver=audio_codec_priority)
rebulk.defaults(name='audio_codec',
conflict_solver=audio_codec_priority,
disabled=lambda context: is_disabled(context, 'audio_codec'))
rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3")
rebulk.regex('Dolby', 'DolbyDigital', 'Dolby-Digital', 'DD', 'AC3D?', value='AC3')
rebulk.regex("DolbyAtmos", "Dolby-Atmos", "Atmos", value="DolbyAtmos")
rebulk.string("MP2", value="MP2")
rebulk.regex('Dolby', 'DolbyDigital', 'Dolby-Digital', 'DD', 'AC3D?', value='Dolby Digital')
rebulk.regex('Dolby-?Atmos', 'Atmos', value='Dolby Atmos')
rebulk.string("AAC", value="AAC")
rebulk.string('EAC3', 'DDP', 'DD+', value="EAC3")
rebulk.string('EAC3', 'DDP', 'DD+', value='Dolby Digital Plus')
rebulk.string("Flac", value="FLAC")
rebulk.string("DTS", value="DTS")
rebulk.regex("True-?HD", value="TrueHD")
rebulk.regex('DTS-?HD', 'DTS(?=-?MA)', value='DTS-HD',
conflict_solver=lambda match, other: other if other.name == 'audio_codec' else '__default__')
rebulk.regex('True-?HD', value='Dolby TrueHD')
rebulk.string('Opus', value='Opus')
rebulk.string('Vorbis', value='Vorbis')
rebulk.string('PCM', value='PCM')
rebulk.string('LPCM', value='LPCM')
rebulk.defaults(name="audio_profile")
rebulk.string("HD", value="HD", tags="DTS")
rebulk.regex("HD-?MA", value="HDMA", tags="DTS")
rebulk.string("HE", value="HE", tags="AAC")
rebulk.string("LC", value="LC", tags="AAC")
rebulk.string("HQ", value="HQ", tags="AC3")
rebulk.defaults(clear=True,
name='audio_profile',
disabled=lambda context: is_disabled(context, 'audio_profile'))
rebulk.string('MA', value='Master Audio', tags=['audio_profile.rule', 'DTS-HD'])
rebulk.string('HR', 'HRA', value='High Resolution Audio', tags=['audio_profile.rule', 'DTS-HD'])
rebulk.string('ES', value='Extended Surround', tags=['audio_profile.rule', 'DTS'])
rebulk.string('HE', value='High Efficiency', tags=['audio_profile.rule', 'AAC'])
rebulk.string('LC', value='Low Complexity', tags=['audio_profile.rule', 'AAC'])
rebulk.string('HQ', value='High Quality', tags=['audio_profile.rule', 'Dolby Digital'])
rebulk.string('EX', value='EX', tags=['audio_profile.rule', 'Dolby Digital'])
rebulk.defaults(name="audio_channels")
rebulk.regex(r'(7[\W_][01](?:ch)?)(?:[^\d]|$)', value='7.1', children=True)
rebulk.regex(r'(5[\W_][01](?:ch)?)(?:[^\d]|$)', value='5.1', children=True)
rebulk.regex(r'(2[\W_]0(?:ch)?)(?:[^\d]|$)', value='2.0', children=True)
rebulk.defaults(clear=True,
name="audio_channels",
disabled=lambda context: is_disabled(context, 'audio_channels'))
rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels')
rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels')
rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels')
rebulk.string('7ch', '8ch', value='7.1')
rebulk.string('5ch', '6ch', value='5.1')
rebulk.string('2ch', 'stereo', value='2.0')
rebulk.string('1ch', 'mono', value='1.0')
rebulk.rules(DtsRule, AacRule, Ac3Rule, AudioValidatorRule, HqConflictRule, AudioChannelsValidatorRule)
for value, items in config.get('audio_channels').items():
for item in items:
if item.startswith('re:'):
rebulk.regex(item[3:], value=value, children=True)
else:
rebulk.string(item, value=value)
rebulk.rules(DtsHDRule, DtsRule, AacRule, DolbyDigitalRule, AudioValidatorRule, HqConflictRule,
AudioChannelsValidatorRule)
return rebulk
@ -111,25 +133,49 @@ class AudioProfileRule(Rule):
super(AudioProfileRule, self).__init__()
self.codec = codec
def enabled(self, context):
return not is_disabled(context, 'audio_profile')
def when(self, matches, context):
profile_list = matches.named('audio_profile', lambda match: self.codec in match.tags)
profile_list = matches.named('audio_profile',
lambda match: 'audio_profile.rule' in match.tags and
self.codec in match.tags)
ret = []
for profile in profile_list:
codec = matches.previous(profile, lambda match: match.name == 'audio_codec' and match.value == self.codec)
codec = matches.at_span(profile.span,
lambda match: match.name == 'audio_codec' and
match.value == self.codec, 0)
if not codec:
codec = matches.next(profile, lambda match: match.name == 'audio_codec' and match.value == self.codec)
codec = matches.previous(profile,
lambda match: match.name == 'audio_codec' and
match.value == self.codec)
if not codec:
codec = matches.next(profile,
lambda match: match.name == 'audio_codec' and
match.value == self.codec)
if not codec:
ret.append(profile)
if codec:
ret.extend(matches.conflicting(profile))
return ret
class DtsHDRule(AudioProfileRule):
"""
Rule to validate DTS-HD profile
"""
def __init__(self):
super(DtsHDRule, self).__init__('DTS-HD')
class DtsRule(AudioProfileRule):
"""
Rule to validate DTS profile
"""
def __init__(self):
super(DtsRule, self).__init__("DTS")
super(DtsRule, self).__init__('DTS')
class AacRule(AudioProfileRule):
@ -138,16 +184,16 @@ class AacRule(AudioProfileRule):
"""
def __init__(self):
super(AacRule, self).__init__("AAC")
super(AacRule, self).__init__('AAC')
class Ac3Rule(AudioProfileRule):
class DolbyDigitalRule(AudioProfileRule):
"""
Rule to validate AC3 profile
Rule to validate Dolby Digital profile
"""
def __init__(self):
super(Ac3Rule, self).__init__("AC3")
super(DolbyDigitalRule, self).__init__('Dolby Digital')
class HqConflictRule(Rule):
@ -155,16 +201,16 @@ class HqConflictRule(Rule):
Solve conflict between HQ from other property and from audio_profile.
"""
dependency = [DtsRule, AacRule, Ac3Rule]
dependency = [DtsHDRule, DtsRule, AacRule, DolbyDigitalRule]
consequence = RemoveMatch
def when(self, matches, context):
hq_audio = matches.named('audio_profile', lambda match: match.value == 'HQ')
hq_audio_spans = [match.span for match in hq_audio]
hq_other = matches.named('other', lambda match: match.span in hq_audio_spans)
def enabled(self, context):
return not is_disabled(context, 'audio_profile')
if hq_other:
return hq_other
def when(self, matches, context):
hq_audio = matches.named('audio_profile', lambda m: m.value == 'High Quality')
hq_audio_spans = [match.span for match in hq_audio]
return matches.named('other', lambda m: m.span in hq_audio_spans)
class AudioChannelsValidatorRule(Rule):
@ -174,6 +220,9 @@ class AudioChannelsValidatorRule(Rule):
priority = 128
consequence = RemoveMatch
def enabled(self, context):
return not is_disabled(context, 'audio_channels')
def when(self, matches, context):
ret = []

View file

@ -0,0 +1,74 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
video_bit_rate and audio_bit_rate properties
"""
import re
from rebulk import Rebulk
from rebulk.rules import Rule, RemoveMatch, RenameMatch
from ..common import dash, seps
from ..common.pattern import is_disabled
from ..common.quantity import BitRate
from ..common.validators import seps_surround
def bit_rate(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'audio_bit_rate')
and is_disabled(context, 'video_bit_rate')))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk.defaults(name='audio_bit_rate', validator=seps_surround)
rebulk.regex(r'\d+-?[kmg]b(ps|its?)', r'\d+\.\d+-?[kmg]b(ps|its?)',
conflict_solver=(
lambda match, other: match
if other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
else other
),
formatter=BitRate.fromstring, tags=['release-group-prefix'])
rebulk.rules(BitRateTypeRule)
return rebulk
class BitRateTypeRule(Rule):
"""
Convert audio bit rate guess into video bit rate.
"""
consequence = [RenameMatch('video_bit_rate'), RemoveMatch]
def when(self, matches, context):
to_rename = []
to_remove = []
if is_disabled(context, 'audio_bit_rate'):
to_remove.extend(matches.named('audio_bit_rate'))
else:
video_bit_rate_disabled = is_disabled(context, 'video_bit_rate')
for match in matches.named('audio_bit_rate'):
previous = matches.previous(match, index=0,
predicate=lambda m: m.name in ('source', 'screen_size', 'video_codec'))
if previous and not matches.holes(previous.end, match.start, predicate=lambda m: m.value.strip(seps)):
after = matches.next(match, index=0, predicate=lambda m: m.name == 'audio_codec')
if after and not matches.holes(match.end, after.start, predicate=lambda m: m.value.strip(seps)):
bitrate = match.value
if bitrate.units == 'Kbps' or (bitrate.units == 'Mbps' and bitrate.magnitude < 10):
continue
if video_bit_rate_disabled:
to_remove.append(match)
else:
to_rename.append(match)
if to_rename or to_remove:
return to_rename, to_remove
return False

View file

@ -9,21 +9,27 @@ from rebulk import Rebulk, AppendMatch, Rule
from .title import TitleFromPosition
from ..common.formatters import cleanup
from ..common.pattern import is_disabled
from ..common.validators import seps_surround
def bonus():
def bonus(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE)
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'bonus'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int,
validator={'__parent__': lambda match: seps_surround},
validator={'__parent__': seps_surround},
validate_all=True,
conflict_solver=lambda match, conflicting: match
if conflicting.name in ['video_codec', 'episode'] and 'bonus-conflict' not in conflicting.tags
if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags
else '__default__')
rebulk.rules(BonusTitleRule)
@ -40,7 +46,7 @@ class BonusTitleRule(Rule):
properties = {'bonus_title': [None]}
def when(self, matches, context):
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
bonus_number = matches.named('bonus', lambda match: not match.private, index=0)
if bonus_number:
filepath = matches.markers.at_match(bonus_number, lambda marker: marker.name == 'path', 0)

View file

@ -6,16 +6,22 @@ cd and cd_count properties
from rebulk.remodule import re
from rebulk import Rebulk
from ..common import dash
from ..common.pattern import is_disabled
def cds():
def cds(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'cd'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk.regex(r'cd-?(?P<cd>\d+)(?:-?of-?(?P<cd_count>\d+))?',
validator={'cd': lambda match: 0 < match.value < 100,

View file

@ -8,33 +8,35 @@ from rebulk.remodule import re
from rebulk import Rebulk
from ..common import seps
from ..common.pattern import is_disabled
from ..common.validators import seps_surround
from ...reutils import build_or_pattern
def container():
def container(config):
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'container'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
rebulk.defaults(name='container',
formatter=lambda value: value.strip(seps),
tags=['extension'],
conflict_solver=lambda match, other: other
if other.name in ['format', 'video_codec'] or
if other.name in ('source', 'video_codec') or
other.name == 'container' and 'extension' not in other.tags
else '__default__')
subtitles = ['srt', 'idx', 'sub', 'ssa', 'ass']
info = ['nfo']
videos = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv',
'iso', 'vob']
torrent = ['torrent']
nzb = ['nzb']
subtitles = config['subtitles']
info = config['info']
videos = config['videos']
torrent = config['torrent']
nzb = config['nzb']
rebulk.regex(r'\.'+build_or_pattern(subtitles)+'$', exts=subtitles, tags=['extension', 'subtitle'])
rebulk.regex(r'\.'+build_or_pattern(info)+'$', exts=info, tags=['extension', 'info'])
@ -42,15 +44,16 @@ def container():
rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent'])
rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb'])
rebulk.defaults(name='container',
rebulk.defaults(clear=True,
name='container',
validator=seps_surround,
formatter=lambda s: s.lower(),
conflict_solver=lambda match, other: match
if other.name in ['format',
'video_codec'] or other.name == 'container' and 'extension' in other.tags
if other.name in ('source',
'video_codec') or other.name == 'container' and 'extension' in other.tags
else '__default__')
rebulk.string(*[sub for sub in subtitles if sub not in ['sub']], tags=['subtitle'])
rebulk.string(*[sub for sub in subtitles if sub not in ('sub', 'ass')], tags=['subtitle'])
rebulk.string(*videos, tags=['video'])
rebulk.string(*torrent, tags=['torrent'])
rebulk.string(*nzb, tags=['nzb'])

View file

@ -7,41 +7,50 @@ country property
import babelfish
from rebulk import Rebulk
from ..common.words import COMMON_WORDS, iter_words
from ..common.pattern import is_disabled
from ..common.words import iter_words
def country():
def country(config, common_words):
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:param common_words: common words
:type common_words: set
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().defaults(name='country')
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'country'))
rebulk = rebulk.defaults(name='country')
def find_countries(string, context=None):
"""
Find countries in given string.
"""
allowed_countries = context.get('allowed_countries') if context else None
return CountryFinder(allowed_countries, common_words).find(string)
rebulk.functional(find_countries,
#  Prefer language and any other property over country if not US or GB.
conflict_solver=lambda match, other: match
if other.name != 'language' or match.value not in [babelfish.Country('US'),
babelfish.Country('GB')]
if other.name != 'language' or match.value not in (babelfish.Country('US'),
babelfish.Country('GB'))
else other,
properties={'country': [None]})
properties={'country': [None]},
disabled=lambda context: not context.get('allowed_countries'))
babelfish.country_converters['guessit'] = GuessitCountryConverter(config['synonyms'])
return rebulk
COUNTRIES_SYN = {'ES': ['españa'],
'GB': ['UK'],
'BR': ['brazilian', 'bra'],
'CA': ['québec', 'quebec', 'qc'],
# FIXME: this one is a bit of a stretch, not sure how to do it properly, though...
'MX': ['Latinoamérica', 'latin america']}
class GuessitCountryConverter(babelfish.CountryReverseConverter): # pylint: disable=missing-docstring
def __init__(self):
def __init__(self, synonyms):
self.guessit_exceptions = {}
for alpha2, synlist in COUNTRIES_SYN.items():
for alpha2, synlist in synonyms.items():
for syn in synlist:
self.guessit_exceptions[syn.lower()] = alpha2
@ -78,32 +87,28 @@ class GuessitCountryConverter(babelfish.CountryReverseConverter): # pylint: dis
raise babelfish.CountryReverseError(name)
babelfish.country_converters['guessit'] = GuessitCountryConverter()
class CountryFinder(object):
"""Helper class to search and return country matches."""
def __init__(self, allowed_countries, common_words):
self.allowed_countries = {l.lower() for l in allowed_countries or []}
self.common_words = common_words
def is_allowed_country(country_object, context=None):
"""
Check if country is allowed.
"""
if context and context.get('allowed_countries'):
allowed_countries = context.get('allowed_countries')
return country_object.name.lower() in allowed_countries or country_object.alpha2.lower() in allowed_countries
return True
def find(self, string):
"""Return all matches for country."""
for word_match in iter_words(string.strip().lower()):
word = word_match.value
if word.lower() in self.common_words:
continue
try:
country_object = babelfish.Country.fromguessit(word)
if (country_object.name.lower() in self.allowed_countries or
country_object.alpha2.lower() in self.allowed_countries):
yield self._to_rebulk_match(word_match, country_object)
except babelfish.Error:
continue
def find_countries(string, context=None):
"""
Find countries in given string.
"""
ret = []
for word_match in iter_words(string.strip().lower()):
word = word_match.value
if word.lower() in COMMON_WORDS:
continue
try:
country_object = babelfish.Country.fromguessit(word)
if is_allowed_country(country_object, context):
ret.append((word_match.span[0], word_match.span[1], {'value': country_object}))
except babelfish.Error:
continue
return ret
@classmethod
def _to_rebulk_match(cls, word, value):
return word.span[0], word.span[1], {'value': value}

View file

@ -6,20 +6,25 @@ crc and uuid properties
from rebulk.remodule import re
from rebulk import Rebulk
from ..common.pattern import is_disabled
from ..common.validators import seps_surround
def crc():
def crc(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE)
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'crc32'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk.defaults(validator=seps_surround)
rebulk.regex('(?:[a-fA-F]|[0-9]){8}', name='crc32',
conflict_solver=lambda match, other: match
conflict_solver=lambda match, other: other
if other.name in ['episode', 'season']
else '__default__')

View file

@ -6,21 +6,29 @@ date and year properties
from rebulk import Rebulk, RemoveMatch, Rule
from ..common.date import search_date, valid_year
from ..common.pattern import is_disabled
from ..common.validators import seps_surround
def date():
def date(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().defaults(validator=seps_surround)
rebulk.regex(r"\d{4}", name="year", formatter=int,
disabled=lambda context: is_disabled(context, 'year'),
conflict_solver=lambda match, other: other
if other.name in ('episode', 'season') and len(other.raw) < len(match.raw)
else '__default__',
validator=lambda match: seps_surround(match) and valid_year(match.value))
def date_functional(string, context):
def date_functional(string, context): # pylint:disable=inconsistent-return-statements
"""
Search for date in the string and retrieves match
@ -33,8 +41,9 @@ def date():
return ret[0], ret[1], {'value': ret[2]}
rebulk.functional(date_functional, name="date", properties={'date': [None]},
disabled=lambda context: is_disabled(context, 'date'),
conflict_solver=lambda match, other: other
if other.name in ['episode', 'season']
if other.name in ('episode', 'season', 'crc32')
else '__default__')
rebulk.rules(KeepMarkedYearInFilepart)
@ -49,6 +58,9 @@ class KeepMarkedYearInFilepart(Rule):
priority = 64
consequence = RemoveMatch
def enabled(self, context):
return not is_disabled(context, 'year')
def when(self, matches, context):
ret = []
if len(matches.named('year')) > 1:

View file

@ -7,28 +7,34 @@ from rebulk.remodule import re
from rebulk import Rebulk
from ..common import dash
from ..common.pattern import is_disabled
from ..common.validators import seps_surround
def edition():
def edition(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'edition'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk.defaults(name='edition', validator=seps_surround)
rebulk.regex('collector', 'collector-edition', 'edition-collector', value='Collector Edition')
rebulk.regex('special-edition', 'edition-special', value='Special Edition',
rebulk.regex('collector', "collector'?s?-edition", 'edition-collector', value='Collector')
rebulk.regex('special-edition', 'edition-special', value='Special',
conflict_solver=lambda match, other: other
if other.name == 'episode_details' and other.value == 'Special'
else '__default__')
rebulk.string('se', value='Special Edition', tags='has-neighbor')
rebulk.regex('criterion-edition', 'edition-criterion', value='Criterion Edition')
rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe Edition')
rebulk.regex('limited', 'limited-edition', value='Limited Edition', tags=['has-neighbor', 'release-group-prefix'])
rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical Edition')
rebulk.string('se', value='Special', tags='has-neighbor')
rebulk.string('ddc', value="Director's Definitive Cut")
rebulk.regex('criterion-edition', 'edition-criterion', 'CC', value='Criterion')
rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe')
rebulk.regex('limited', 'limited-edition', value='Limited', tags=['has-neighbor', 'release-group-prefix'])
rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical')
rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC',
value="Director's Cut")
rebulk.regex('extended', 'extended-?cut', 'extended-?version',
@ -37,5 +43,10 @@ def edition():
for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'):
rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after'])
rebulk.regex('imax', 'imax-edition', value='IMAX')
rebulk.regex('fan-edit(?:ion)?', 'fan-collection', value='Fan')
rebulk.regex('ultimate-edition', value='Ultimate')
rebulk.regex("ultimate-collector'?s?-edition", value=['Ultimate', 'Collector'])
rebulk.regex('ultimate-fan-edit(?:ion)?', 'ultimate-fan-collection', value=['Ultimate', 'Fan'])
return rebulk

View file

@ -9,26 +9,32 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, RenameMatch, POST_PRO
from ..common import seps, title_seps
from ..common.formatters import cleanup
from ..common.pattern import is_disabled
from ..common.validators import or_
from ..properties.title import TitleFromPosition, TitleBaseRule
from ..properties.type import TypeProcessor
def episode_title():
def episode_title(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
previous_names = ('episode', 'episode_details', 'episode_count',
previous_names = ('episode', 'episode_count',
'season', 'season_count', 'date', 'title', 'year')
rebulk = Rebulk().rules(RemoveConflictsWithEpisodeTitle(previous_names),
EpisodeTitleFromPosition(previous_names),
AlternativeTitleReplace(previous_names),
TitleToEpisodeTitle,
Filepart3EpisodeTitle,
Filepart2EpisodeTitle,
RenameEpisodeTitleWhenMovieType)
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'episode_title'))
rebulk = rebulk.rules(RemoveConflictsWithEpisodeTitle(previous_names),
EpisodeTitleFromPosition(previous_names),
AlternativeTitleReplace(previous_names),
TitleToEpisodeTitle,
Filepart3EpisodeTitle,
Filepart2EpisodeTitle,
RenameEpisodeTitleWhenMovieType)
return rebulk
@ -43,7 +49,7 @@ class RemoveConflictsWithEpisodeTitle(Rule):
def __init__(self, previous_names):
super(RemoveConflictsWithEpisodeTitle, self).__init__()
self.previous_names = previous_names
self.next_names = ('streaming_service', 'screen_size', 'format',
self.next_names = ('streaming_service', 'screen_size', 'source',
'video_codec', 'audio_codec', 'other', 'container')
self.affected_if_holes_after = ('part', )
self.affected_names = ('part', 'year')
@ -53,13 +59,11 @@ class RemoveConflictsWithEpisodeTitle(Rule):
for filepart in matches.markers.named('path'):
for match in matches.range(filepart.start, filepart.end,
predicate=lambda m: m.name in self.affected_names):
before = matches.previous(match, index=0,
predicate=lambda m, fp=filepart: not m.private and m.start >= fp.start)
before = matches.range(filepart.start, match.start, predicate=lambda m: not m.private, index=-1)
if not before or before.name not in self.previous_names:
continue
after = matches.next(match, index=0,
predicate=lambda m, fp=filepart: not m.private and m.end <= fp.end)
after = matches.range(match.end, filepart.end, predicate=lambda m: not m.private, index=0)
if not after or after.name not in self.next_names:
continue
@ -100,16 +104,15 @@ class TitleToEpisodeTitle(Rule):
for title in titles:
title_groups[title.value].append(title)
if len(title_groups) < 2:
return
episode_titles = []
if len(title_groups) < 2:
return episode_titles
for title in titles:
if matches.previous(title, lambda match: match.name == 'episode'):
episode_titles.append(title)
if episode_titles:
return episode_titles
return episode_titles
def then(self, matches, when_response, context):
for title in when_response:
@ -131,8 +134,7 @@ class EpisodeTitleFromPosition(TitleBaseRule):
def hole_filter(self, hole, matches):
episode = matches.previous(hole,
lambda previous: any(name in previous.names
for name in self.previous_names),
lambda previous: previous.named(*self.previous_names),
0)
crc32 = matches.named('crc32')
@ -150,7 +152,7 @@ class EpisodeTitleFromPosition(TitleBaseRule):
return False
return super(EpisodeTitleFromPosition, self).should_remove(match, matches, filepart, hole, context)
def when(self, matches, context):
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
if matches.named('episode_title'):
return
return super(EpisodeTitleFromPosition, self).when(matches, context)
@ -167,7 +169,7 @@ class AlternativeTitleReplace(Rule):
super(AlternativeTitleReplace, self).__init__()
self.previous_names = previous_names
def when(self, matches, context):
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
if matches.named('episode_title'):
return
@ -177,8 +179,7 @@ class AlternativeTitleReplace(Rule):
predicate=lambda match: 'title' in match.tags, index=0)
if main_title:
episode = matches.previous(main_title,
lambda previous: any(name in previous.names
for name in self.previous_names),
lambda previous: previous.named(*self.previous_names),
0)
crc32 = matches.named('crc32')
@ -202,7 +203,7 @@ class RenameEpisodeTitleWhenMovieType(Rule):
dependency = TypeProcessor
consequence = RenameMatch
def when(self, matches, context):
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
if matches.named('episode_title', lambda m: 'alternative-replaced' not in m.tags) \
and not matches.named('type', lambda m: m.value == 'episode'):
return matches.named('episode_title')
@ -221,12 +222,18 @@ class Filepart3EpisodeTitle(Rule):
Serie name/SO1/E01-episode_title.mkv
AAAAAAAAAA/BBB/CCCCCCCCCCCCCCCCCCCC
Serie name/SO1/episode_title-E01.mkv
AAAAAAAAAA/BBB/CCCCCCCCCCCCCCCCCCCC
If CCCC contains episode and BBB contains seasonNumber
Then title is to be found in AAAA.
"""
consequence = AppendMatch('title')
def when(self, matches, context):
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
if matches.tagged('filepart-title'):
return
fileparts = matches.markers.named('path')
if len(fileparts) < 3:
return
@ -241,6 +248,7 @@ class Filepart3EpisodeTitle(Rule):
if season:
hole = matches.holes(subdirectory.start, subdirectory.end,
ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
formatter=cleanup, seps=title_seps, predicate=lambda match: match.value,
index=0)
if hole:
@ -267,7 +275,10 @@ class Filepart2EpisodeTitle(Rule):
"""
consequence = AppendMatch('title')
def when(self, matches, context):
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
if matches.tagged('filepart-title'):
return
fileparts = matches.markers.named('path')
if len(fileparts) < 2:
return
@ -280,7 +291,10 @@ class Filepart2EpisodeTitle(Rule):
season = (matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0) or
matches.range(filename.start, filename.end, lambda match: match.name == 'season', 0))
if season:
hole = matches.holes(directory.start, directory.end, formatter=cleanup, seps=title_seps,
hole = matches.holes(directory.start, directory.end,
ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
formatter=cleanup, seps=title_seps,
predicate=lambda match: match.value, index=0)
if hole:
hole.tags.append('filepart-title')
return hole

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
episode, season, episode_count, season_count and episode_details properties
episode, season, disc, episode_count, season_count and episode_details properties
"""
import copy
from collections import defaultdict
@ -11,24 +11,30 @@ from rebulk.match import Match
from rebulk.remodule import re
from rebulk.utils import is_iterable
from guessit.rules import match_processors
from guessit.rules.common.numeral import parse_numeral, numeral
from .title import TitleFromPosition
from ..common import dash, alt_dash, seps
from ..common import dash, alt_dash, seps, seps_no_fs
from ..common.formatters import strip
from ..common.numeral import numeral, parse_numeral
from ..common.validators import compose, seps_surround, seps_before, int_coercable
from ..common.pattern import is_disabled
from ..common.validators import seps_surround, int_coercable, and_
from ...reutils import build_or_pattern
def episodes():
def episodes(config):
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
# pylint: disable=too-many-branches,too-many-statements,too-many-locals
rebulk = Rebulk()
rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'])
def is_season_episode_disabled(context):
"""Whether season and episode rules should be enabled."""
return is_disabled(context, 'episode') or is_disabled(context, 'season')
def episodes_season_chain_breaker(matches):
"""
@ -39,16 +45,14 @@ def episodes():
:rtype:
"""
eps = matches.named('episode')
if len(eps) > 1 and abs(eps[-1].value - eps[-2].value) > 100:
if len(eps) > 1 and abs(eps[-1].value - eps[-2].value) > episode_max_range:
return True
seasons = matches.named('season')
if len(seasons) > 1 and abs(seasons[-1].value - seasons[-2].value) > 100:
if len(seasons) > 1 and abs(seasons[-1].value - seasons[-2].value) > season_max_range:
return True
return False
rebulk.chain_defaults(chain_breaker=episodes_season_chain_breaker)
def season_episode_conflict_solver(match, other):
"""
Conflict solver for episode/season patterns
@ -57,40 +61,25 @@ def episodes():
:param other:
:return:
"""
if match.name == 'episode' and other.name in \
['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date', 'year'] \
and 'weak-audio_channels' not in other.tags:
return match
if match.name == 'season' and other.name in \
['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date'] \
and 'weak-audio_channels' not in other.tags:
return match
if match.name in ['season', 'episode'] and other.name in ['season', 'episode'] \
and match.initiator != other.initiator:
if 'weak-episode' in match.tags or 'x' in match.initiator.raw.lower():
if match.name != other.name:
if match.name == 'episode' and other.name == 'year':
return match
if 'weak-episode' in other.tags or 'x' in other.initiator.raw.lower():
return other
if match.name in ('season', 'episode'):
if other.name in ('video_codec', 'audio_codec', 'container', 'date'):
return match
if (other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
and not match.initiator.children.named(match.name + 'Marker')) or (
other.name == 'screen_size' and not int_coercable(other.raw)):
return match
if other.name in ('season', 'episode') and match.initiator != other.initiator:
if (match.initiator.name in ('weak_episode', 'weak_duplicate')
and other.initiator.name in ('weak_episode', 'weak_duplicate')):
return '__default__'
for current in (match, other):
if 'weak-episode' in current.tags or 'x' in current.initiator.raw.lower():
return current
return '__default__'
season_episode_seps = []
season_episode_seps.extend(seps)
season_episode_seps.extend(['x', 'X', 'e', 'E'])
season_words = ['season', 'saison', 'seizoen', 'serie', 'seasons', 'saisons', 'series',
'tem', 'temp', 'temporada', 'temporadas', 'stagione']
episode_words = ['episode', 'episodes', 'eps', 'ep', 'episodio',
'episodios', 'capitulo', 'capitulos']
of_words = ['of', 'sur']
all_words = ['All']
season_markers = ["S"]
season_ep_markers = ["x"]
episode_markers = ["xE", "Ex", "EP", "E", "x"]
range_separators = ['-', '~', 'to', 'a']
weak_discrete_separators = list(sep for sep in seps if sep not in range_separators)
strong_discrete_separators = ['+', '&', 'and', 'et']
discrete_separators = strong_discrete_separators + weak_discrete_separators
def ordering_validator(match):
"""
Validator for season list. They should be in natural order to be validated.
@ -124,64 +113,18 @@ def episodes():
lambda m: m.name == property_name + 'Separator')
separator = match.children.previous(current_match,
lambda m: m.name == property_name + 'Separator', 0)
if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
if not current_match.value - previous_match.value == 1:
valid = False
if separator.raw in strong_discrete_separators:
valid = True
break
if separator:
if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
if not 0 < current_match.value - previous_match.value <= max_range_gap + 1:
valid = False
if separator.raw in strong_discrete_separators:
valid = True
break
previous_match = current_match
return valid
return is_consecutive('episode') and is_consecutive('season')
# S01E02, 01x02, S01S02S03
rebulk.chain(formatter={'season': int, 'episode': int},
tags=['SxxExx'],
abbreviations=[alt_dash],
children=True,
private_parent=True,
validate_all=True,
validator={'__parent__': ordering_validator},
conflict_solver=season_episode_conflict_solver) \
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
build_or_pattern(episode_markers, name='episodeMarker') + r'@?(?P<episode>\d+)',
validate_all=True,
validator={'__parent__': seps_before}).repeater('+') \
.regex(build_or_pattern(episode_markers + discrete_separators + range_separators,
name='episodeSeparator',
escape=True) +
r'(?P<episode>\d+)').repeater('*') \
.chain() \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers, name='episodeMarker') +
r'@?(?P<episode>\d+)',
validate_all=True,
validator={'__parent__': seps_before}) \
.chain() \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers, name='episodeMarker') +
r'@?(?P<episode>\d+)',
validate_all=True,
validator={'__parent__': seps_before}) \
.regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
name='episodeSeparator',
escape=True) +
r'(?P<episode>\d+)').repeater('*') \
.chain() \
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)',
validate_all=True,
validator={'__parent__': seps_before}) \
.regex(build_or_pattern(season_markers + discrete_separators + range_separators,
name='seasonSeparator',
escape=True) +
r'(?P<season>\d+)').repeater('*')
# episode_details property
for episode_detail in ('Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired'):
rebulk.string(episode_detail, value=episode_detail, name='episode_details')
rebulk.regex(r'Extras?', name='episode_details', value='Extras')
def validate_roman(match):
"""
Validate a roman match if surrounded by separators
@ -194,110 +137,204 @@ def episodes():
return True
return seps_surround(match)
season_words = config['season_words']
episode_words = config['episode_words']
of_words = config['of_words']
all_words = config['all_words']
season_markers = config['season_markers']
season_ep_markers = config['season_ep_markers']
disc_markers = config['disc_markers']
episode_markers = config['episode_markers']
range_separators = config['range_separators']
weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators)
strong_discrete_separators = config['discrete_separators']
discrete_separators = strong_discrete_separators + weak_discrete_separators
episode_max_range = config['episode_max_range']
season_max_range = config['season_max_range']
max_range_gap = config['max_range_gap']
rebulk = Rebulk() \
.regex_defaults(flags=re.IGNORECASE) \
.string_defaults(ignore_case=True) \
.chain_defaults(chain_breaker=episodes_season_chain_breaker) \
.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
formatter={'season': int, 'episode': int, 'version': int, 'count': int},
children=True,
private_parent=True,
conflict_solver=season_episode_conflict_solver,
abbreviations=[alt_dash])
# S01E02, 01x02, S01S02S03
rebulk.chain(
tags=['SxxExx'],
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)')\
.repeater('+') \
.regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators,
name='episodeSeparator',
escape=True) +
r'(?P<episode>\d+)').repeater('*')
rebulk.chain(tags=['SxxExx'],
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers, name='episodeMarker') +
r'@?(?P<episode>\d+)').repeater('+') \
rebulk.chain(tags=['SxxExx'],
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers, name='episodeMarker') +
r'@?(?P<episode>\d+)') \
.regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
name='episodeSeparator',
escape=True) +
r'(?P<episode>\d+)').repeater('*')
rebulk.chain(tags=['SxxExx'],
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)') \
.regex('(?P<other>Extras)', name='other', value='Extras', tags=['no-release-group-prefix']).repeater('?') \
.regex(build_or_pattern(season_markers + discrete_separators + range_separators,
name='seasonSeparator',
escape=True) +
r'(?P<season>\d+)').repeater('*')
# episode_details property
for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'):
rebulk.string(episode_detail,
private_parent=False,
children=False,
value=episode_detail,
name='episode_details',
disabled=lambda context: is_disabled(context, 'episode_details'))
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True,
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
children=True,
private_parent=True,
conflict_solver=season_episode_conflict_solver)
rebulk.chain(abbreviations=[alt_dash],
rebulk.chain(validate_all=True,
conflict_solver=season_episode_conflict_solver,
formatter={'season': parse_numeral, 'count': parse_numeral},
validator={'__parent__': compose(seps_surround, ordering_validator),
validator={'__parent__': and_(seps_surround, ordering_validator),
'season': validate_roman,
'count': validate_roman}) \
.defaults(validator=None) \
'count': validate_roman},
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'season')) \
.defaults(formatter={'season': parse_numeral, 'count': parse_numeral},
validator={'season': validate_roman, 'count': validate_roman},
conflict_solver=season_episode_conflict_solver) \
.regex(build_or_pattern(season_words, name='seasonMarker') + '@?(?P<season>' + numeral + ')') \
.regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \
.regex(r'@?' + build_or_pattern(range_separators + discrete_separators + ['@'],
name='seasonSeparator', escape=True) +
r'@?(?P<season>\d+)').repeater('*')
rebulk.defaults(abbreviations=[dash])
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>\d+)' +
r'(?:v(?P<version>\d+))?' +
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
abbreviations=[dash], formatter={'episode': int, 'version': int, 'count': int},
disabled=lambda context: context.get('type') == 'episode')
disabled=lambda context: context.get('type') == 'episode' or is_disabled(context, 'episode'))
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>' + numeral + ')' +
r'(?:v(?P<version>\d+))?' +
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
abbreviations=[dash],
validator={'episode': validate_roman},
formatter={'episode': parse_numeral, 'version': int, 'count': int},
disabled=lambda context: context.get('type') != 'episode')
formatter={'episode': parse_numeral},
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode'))
rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')',
tags=['SxxExx'],
abbreviations=[dash],
validator=None,
formatter={'season': int, 'other': lambda match: 'Complete'})
formatter={'other': lambda match: 'Complete'},
disabled=lambda context: is_disabled(context, 'season'))
# 12, 13
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int},
disabled=lambda context: context.get('type') == 'movie') \
.defaults(validator=None) \
rebulk.chain(tags=['weak-episode'],
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
.defaults(validator=None, tags=['weak-episode']) \
.regex(r'(?P<episode>\d{2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*')
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})', abbreviations=None).repeater('*')
# 012, 013
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int},
disabled=lambda context: context.get('type') == 'movie') \
.defaults(validator=None) \
rebulk.chain(tags=['weak-episode'],
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
.defaults(validator=None, tags=['weak-episode']) \
.regex(r'0(?P<episode>\d{1,2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*')
.regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
# 112, 113
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int},
disabled=lambda context: (not context.get('episode_prefer_number', False) or
context.get('type') == 'movie')) \
.defaults(validator=None) \
rebulk.chain(tags=['weak-episode'],
name='weak_episode',
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
.defaults(validator=None, tags=['weak-episode'], name='weak_episode') \
.regex(r'(?P<episode>\d{3,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*')
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})', abbreviations=None).repeater('*')
# 1, 2, 3
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int},
disabled=lambda context: context.get('type') != 'episode') \
.defaults(validator=None) \
rebulk.chain(tags=['weak-episode'],
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) \
.defaults(validator=None, tags=['weak-episode']) \
.regex(r'(?P<episode>\d)') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*')
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
# e112, e113
# TODO: Enhance rebulk for validator to be used globally (season_episode_validator)
rebulk.chain(formatter={'episode': int, 'version': int}) \
# e112, e113, 1e18, 3e19
rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
.defaults(validator=None) \
.regex(r'(?P<episodeMarker>e)(?P<episode>\d{1,4})') \
.regex(r'(?P<season>\d{1,2})?(?P<episodeMarker>e)(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
# ep 112, ep113, ep112, ep113
rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}) \
rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
.defaults(validator=None) \
.regex(r'ep-?(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*')
.regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
# cap 112, cap 112_114
rebulk.chain(abbreviations=[dash],
tags=['see-pattern'],
formatter={'season': int, 'episode': int}) \
.defaults(validator=None) \
rebulk.chain(tags=['see-pattern'],
disabled=is_season_episode_disabled) \
.defaults(validator=None, tags=['see-pattern']) \
.regex(r'(?P<seasonMarker>cap)-?(?P<season>\d{1,2})(?P<episode>\d{2})') \
.regex(r'(?P<episodeSeparator>-)(?P<season>\d{1,2})(?P<episode>\d{2})').repeater('?')
# 102, 0102
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode', 'weak-duplicate'],
formatter={'season': int, 'episode': int, 'version': int},
conflict_solver=lambda match, other: match if other.name == 'year' else '__default__',
rebulk.chain(tags=['weak-episode', 'weak-duplicate'],
name='weak_duplicate',
conflict_solver=season_episode_conflict_solver,
disabled=lambda context: (context.get('episode_prefer_number', False) or
context.get('type') == 'movie')) \
.defaults(validator=None) \
context.get('type') == 'movie') or is_season_episode_disabled(context)) \
.defaults(tags=['weak-episode', 'weak-duplicate'],
name='weak_duplicate',
validator=None,
conflict_solver=season_episode_conflict_solver) \
.regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*')
.regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})', abbreviations=None).repeater('*')
rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int)
rebulk.regex(r'v(?P<version>\d+)',
formatter=int,
disabled=lambda context: is_disabled(context, 'version'))
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])
@ -305,19 +342,107 @@ def episodes():
# detached of X count (season/episode)
rebulk.regex(r'(?P<episode>\d+)-?' + build_or_pattern(of_words) +
r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?',
abbreviations=[dash], children=True, private_parent=True, formatter=int)
formatter=int,
pre_match_processor=match_processors.strip,
disabled=lambda context: is_disabled(context, 'episode'))
rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode")
rebulk.regex(r'Minisodes?',
children=False,
private_parent=False,
name='episode_format',
value="Minisode",
disabled=lambda context: is_disabled(context, 'episode_format'))
rebulk.rules(RemoveInvalidSeason, RemoveInvalidEpisode,
SeePatternRange(range_separators + ['_']), EpisodeNumberSeparatorRange(range_separators),
SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx,
RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator,
CountValidator, EpisodeSingleDigitValidator)
rebulk.rules(WeakConflictSolver, RemoveInvalidSeason, RemoveInvalidEpisode,
SeePatternRange(range_separators + ['_']),
EpisodeNumberSeparatorRange(range_separators),
SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate,
EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, RemoveWeak(episode_words),
RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch)
return rebulk
class WeakConflictSolver(Rule):
"""
Rule to decide whether weak-episode or weak-duplicate matches should be kept.
If an anime is detected:
- weak-duplicate matches should be removed
- weak-episode matches should be tagged as anime
Otherwise:
- weak-episode matches are removed unless they're part of an episode range match.
"""
priority = 128
consequence = [RemoveMatch, AppendMatch]
def enabled(self, context):
return context.get('type') != 'movie'
@classmethod
def is_anime(cls, matches):
"""Return True if it seems to be an anime.
Anime characteristics:
- version, crc32 matches
- screen_size inside brackets
- release_group at start and inside brackets
"""
if matches.named('version') or matches.named('crc32'):
return True
for group in matches.markers.named('group'):
if matches.range(group.start, group.end, predicate=lambda m: m.name == 'screen_size'):
return True
if matches.markers.starting(group.start, predicate=lambda m: m.name == 'path'):
hole = matches.holes(group.start, group.end, index=0)
if hole and hole.raw == group.raw:
return True
return False
def when(self, matches, context):
to_remove = []
to_append = []
anime_detected = self.is_anime(matches)
for filepart in matches.markers.named('path'):
weak_matches = matches.range(filepart.start, filepart.end, predicate=(
lambda m: m.initiator.name == 'weak_episode'))
weak_dup_matches = matches.range(filepart.start, filepart.end, predicate=(
lambda m: m.initiator.name == 'weak_duplicate'))
if anime_detected:
if weak_matches:
to_remove.extend(weak_dup_matches)
for match in matches.range(filepart.start, filepart.end, predicate=(
lambda m: m.name == 'episode' and m.initiator.name != 'weak_duplicate')):
episode = copy.copy(match)
episode.tags = episode.tags + ['anime']
to_append.append(episode)
to_remove.append(match)
elif weak_dup_matches:
episodes_in_range = matches.range(filepart.start, filepart.end, predicate=(
lambda m:
m.name == 'episode' and m.initiator.name == 'weak_episode'
and m.initiator.children.named('episodeSeparator')
))
if not episodes_in_range and not matches.range(filepart.start, filepart.end,
predicate=lambda m: 'SxxExx' in m.tags):
to_remove.extend(weak_matches)
else:
for match in episodes_in_range:
episode = copy.copy(match)
episode.tags = []
to_append.append(episode)
to_remove.append(match)
if to_append:
to_remove.extend(weak_dup_matches)
if to_remove or to_append:
return to_remove, to_append
return False
class CountValidator(Rule):
"""
Validate count property and rename it
@ -341,7 +466,9 @@ class CountValidator(Rule):
season_count.append(count)
else:
to_remove.append(count)
return to_remove, episode_count, season_count
if to_remove or episode_count or season_count:
return to_remove, episode_count, season_count
return False
class SeePatternRange(Rule):
@ -376,7 +503,9 @@ class SeePatternRange(Rule):
to_remove.append(separator)
return to_remove, to_append
if to_remove or to_append:
return to_remove, to_append
return False
class AbstractSeparatorRange(Rule):
@ -396,14 +525,16 @@ class AbstractSeparatorRange(Rule):
to_append = []
for separator in matches.named(self.property_name + 'Separator'):
previous_match = matches.previous(separator, lambda match: match.name == self.property_name, 0)
next_match = matches.next(separator, lambda match: match.name == self.property_name, 0)
previous_match = matches.previous(separator, lambda m: m.name == self.property_name, 0)
next_match = matches.next(separator, lambda m: m.name == self.property_name, 0)
initiator = separator.initiator
if previous_match and next_match and separator.value in self.range_separators:
to_remove.append(next_match)
for episode_number in range(previous_match.value + 1, next_match.value):
match = copy.copy(next_match)
match.value = episode_number
initiator.children.append(match)
to_append.append(match)
to_append.append(next_match)
to_remove.append(separator)
@ -415,9 +546,11 @@ class AbstractSeparatorRange(Rule):
if separator not in self.range_separators:
separator = strip(separator)
if separator in self.range_separators:
initiator = previous_match.initiator
for episode_number in range(previous_match.value + 1, next_match.value):
match = copy.copy(next_match)
match.value = episode_number
initiator.children.append(match)
to_append.append(match)
to_append.append(Match(previous_match.end, next_match.start - 1,
name=self.property_name + 'Separator',
@ -428,15 +561,51 @@ class AbstractSeparatorRange(Rule):
previous_match = next_match
return to_remove, to_append
if to_remove or to_append:
return to_remove, to_append
return False
class RenameToAbsoluteEpisode(Rule):
"""
Rename episode to absolute_episodes.
Absolute episodes are only used if two groups of episodes are detected:
S02E04-06 25-27
25-27 S02E04-06
2x04-06 25-27
28. Anime Name S02E05
The matches in the group with higher episode values are renamed to absolute_episode.
"""
consequence = RenameMatch('absolute_episode')
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
initiators = {match.initiator for match in matches.named('episode')
if len(match.initiator.children.named('episode')) > 1}
if len(initiators) != 2:
ret = []
for filepart in matches.markers.named('path'):
if matches.range(filepart.start + 1, filepart.end, predicate=lambda m: m.name == 'episode'):
ret.extend(
matches.starting(filepart.start, predicate=lambda m: m.initiator.name == 'weak_episode'))
return ret
initiators = sorted(initiators, key=lambda item: item.end)
if not matches.holes(initiators[0].end, initiators[1].start, predicate=lambda m: m.raw.strip(seps)):
first_range = matches.named('episode', predicate=lambda m: m.initiator == initiators[0])
second_range = matches.named('episode', predicate=lambda m: m.initiator == initiators[1])
if len(first_range) == len(second_range):
if second_range[0].value > first_range[0].value:
return second_range
if first_range[0].value > second_range[0].value:
return first_range
class EpisodeNumberSeparatorRange(AbstractSeparatorRange):
"""
Remove separator matches and create matches for episoderNumber range.
"""
priority = 128
consequence = [RemoveMatch, AppendMatch]
def __init__(self, range_separators):
super(EpisodeNumberSeparatorRange, self).__init__(range_separators, "episode")
@ -446,8 +615,6 @@ class SeasonSeparatorRange(AbstractSeparatorRange):
"""
Remove separator matches and create matches for season range.
"""
priority = 128
consequence = [RemoveMatch, AppendMatch]
def __init__(self, range_separators):
super(SeasonSeparatorRange, self).__init__(range_separators, "season")
@ -455,7 +622,7 @@ class SeasonSeparatorRange(AbstractSeparatorRange):
class RemoveWeakIfMovie(Rule):
"""
Remove weak-movie tagged matches if it seems to be a movie.
Remove weak-episode tagged matches if it seems to be a movie.
"""
priority = 64
consequence = RemoveMatch
@ -471,19 +638,69 @@ class RemoveWeakIfMovie(Rule):
year = matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'year', index=0)
if year:
remove = True
next_match = matches.next(year, predicate=lambda m, fp=filepart: m.private and m.end <= fp.end, index=0)
if next_match and not matches.at_match(next_match, predicate=lambda m: m.name == 'year'):
next_match = matches.range(year.end, filepart.end, predicate=lambda m: m.private, index=0)
if (next_match and not matches.holes(year.end, next_match.start, predicate=lambda m: m.raw.strip(seps))
and not matches.at_match(next_match, predicate=lambda m: m.name == 'year')):
to_ignore.add(next_match.initiator)
to_ignore.update(matches.range(filepart.start, filepart.end,
predicate=lambda m: len(m.children.named('episode')) > 1))
to_remove.extend(matches.conflicting(year))
if remove:
to_remove.extend(matches.tagged('weak-movie', predicate=lambda m: m.initiator not in to_ignore))
to_remove.extend(matches.tagged('weak-episode', predicate=(
lambda m: m.initiator not in to_ignore and 'anime' not in m.tags)))
return to_remove
class RemoveWeak(Rule):
"""
Remove weak-episode matches which appears after video, source, and audio matches.
"""
priority = 16
consequence = RemoveMatch, AppendMatch
def __init__(self, episode_words):
super(RemoveWeak, self).__init__()
self.episode_words = episode_words
def when(self, matches, context):
to_remove = []
to_append = []
for filepart in matches.markers.named('path'):
weaks = matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags)
if weaks:
weak = weaks[0]
previous = matches.previous(weak, predicate=lambda m: m.name in (
'audio_codec', 'screen_size', 'streaming_service', 'source', 'video_profile',
'audio_channels', 'audio_profile'), index=0)
if previous and not matches.holes(
previous.end, weak.start, predicate=lambda m: m.raw.strip(seps)):
if previous.raw.lower() in self.episode_words:
try:
episode = copy.copy(weak)
episode.name = 'episode'
episode.value = int(weak.value)
episode.start = previous.start
episode.private = False
episode.tags = []
to_append.append(episode)
except ValueError:
pass
to_remove.extend(weaks)
if to_remove or to_append:
return to_remove, to_append
return False
class RemoveWeakIfSxxExx(Rule):
"""
Remove weak-movie tagged matches if SxxExx pattern is matched.
Remove weak-episode tagged matches if SxxExx pattern is matched.
Weak episodes at beginning of filepart are kept.
"""
priority = 64
consequence = RemoveMatch
@ -492,9 +709,10 @@ class RemoveWeakIfSxxExx(Rule):
to_remove = []
for filepart in matches.markers.named('path'):
if matches.range(filepart.start, filepart.end,
predicate=lambda match: not match.private and 'SxxExx' in match.tags):
to_remove.extend(matches.range(
filepart.start, filepart.end, predicate=lambda match: 'weak-movie' in match.tags))
predicate=lambda m: not m.private and 'SxxExx' in m.tags):
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags):
if match.start != filepart.start or match.initiator.name != 'weak_episode':
to_remove.append(match)
return to_remove
@ -575,7 +793,7 @@ class RemoveWeakDuplicate(Rule):
for filepart in matches.markers.named('path'):
patterns = defaultdict(list)
for match in reversed(matches.range(filepart.start, filepart.end,
predicate=lambda match: 'weak-duplicate' in match.tags)):
predicate=lambda m: 'weak-duplicate' in m.tags)):
if match.pattern in patterns[match.name]:
to_remove.append(match)
else:
@ -615,15 +833,15 @@ class RemoveDetachedEpisodeNumber(Rule):
episode_numbers = []
episode_values = set()
for match in matches.named('episode', lambda match: not match.private and 'weak-movie' in match.tags):
for match in matches.named('episode', lambda m: not m.private and 'weak-episode' in m.tags):
if match.value not in episode_values:
episode_numbers.append(match)
episode_values.add(match.value)
episode_numbers = list(sorted(episode_numbers, key=lambda match: match.value))
episode_numbers = list(sorted(episode_numbers, key=lambda m: m.value))
if len(episode_numbers) > 1 and \
episode_numbers[0].value < 10 and \
episode_numbers[1].value - episode_numbers[0].value != 1:
episode_numbers[0].value < 10 and \
episode_numbers[1].value - episode_numbers[0].value != 1:
parent = episode_numbers[0]
while parent: # TODO: Add a feature in rebulk to avoid this ...
ret.append(parent)
@ -664,3 +882,31 @@ class EpisodeSingleDigitValidator(Rule):
if not matches.range(*group.span, predicate=lambda match: match.name == 'title'):
ret.append(episode)
return ret
class RenameToDiscMatch(Rule):
"""
Rename episodes detected with `d` episodeMarkers to `disc`.
"""
consequence = [RenameMatch('disc'), RenameMatch('discMarker'), RemoveMatch]
def when(self, matches, context):
discs = []
markers = []
to_remove = []
disc_disabled = is_disabled(context, 'disc')
for marker in matches.named('episodeMarker', predicate=lambda m: m.value.lower() == 'd'):
if disc_disabled:
to_remove.append(marker)
to_remove.extend(marker.initiator.children)
continue
markers.append(marker)
discs.extend(sorted(marker.initiator.children.named('episode'), key=lambda m: m.value))
if discs or markers or to_remove:
return discs, markers, to_remove
return False

View file

@ -7,10 +7,11 @@ from rebulk import Rebulk, AppendMatch, Rule
from rebulk.remodule import re
from ..common.formatters import cleanup
from ..common.pattern import is_disabled
from ..common.validators import seps_surround
def film():
def film(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:return: Created Rebulk object
@ -18,7 +19,8 @@ def film():
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, validate_all=True, validator={'__parent__': seps_surround})
rebulk.regex(r'f(\d{1,2})', name='film', private_parent=True, children=True, formatter=int)
rebulk.regex(r'f(\d{1,2})', name='film', private_parent=True, children=True, formatter=int,
disabled=lambda context: is_disabled(context, 'film'))
rebulk.rules(FilmTitleRule)
@ -33,7 +35,10 @@ class FilmTitleRule(Rule):
properties = {'film_title': [None]}
def when(self, matches, context):
def enabled(self, context):
return not is_disabled(context, 'film_title')
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
bonus_number = matches.named('film', lambda match: not match.private, index=0)
if bonus_number:
filepath = matches.markers.at_match(bonus_number, lambda marker: marker.name == 'path', 0)

View file

@ -1,72 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
format property
"""
from rebulk.remodule import re
from rebulk import Rebulk, RemoveMatch, Rule
from ..common import dash
from ..common.validators import seps_before, seps_after
def format_():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk.defaults(name="format", tags=['video-codec-prefix', 'streaming_service.suffix'])
rebulk.regex("VHS", "VHS-?Rip", value="VHS")
rebulk.regex("CAM", "CAM-?Rip", "HD-?CAM", value="Cam")
rebulk.regex("TELESYNC", "TS", "HD-?TS", value="Telesync")
rebulk.regex("WORKPRINT", "WP", value="Workprint")
rebulk.regex("TELECINE", "TC", value="Telecine")
rebulk.regex("PPV", "PPV-?Rip", value="PPV") # Pay Per View
rebulk.regex("SD-?TV", "SD-?TV-?Rip", "Rip-?SD-?TV", "TV-?Rip",
"Rip-?TV", "TV-?(?=Dub)", value="TV") # TV is too common to allow matching
rebulk.regex("DVB-?Rip", "DVB", "PD-?TV", value="DVB")
rebulk.regex("DVD", "DVD-?Rip", "VIDEO-?TS", "DVD-?R(?:$|(?!E))", # "DVD-?R(?:$|^E)" => DVD-Real ...
"DVD-?9", "DVD-?5", value="DVD")
rebulk.regex("HD-?TV", "TV-?RIP-?HD", "HD-?TV-?RIP", "HD-?RIP", value="HDTV",
conflict_solver=lambda match, other: other if other.name == 'other' else '__default__')
rebulk.regex("VOD", "VOD-?Rip", value="VOD")
rebulk.regex("WEB-?Rip", "WEB-?DL-?Rip", "WEB-?Cap", value="WEBRip")
rebulk.regex("WEB-?DL", "WEB-?HD", "WEB", "DL-?WEB", "DL(?=-?Mux)", value="WEB-DL")
rebulk.regex("HD-?DVD-?Rip", "HD-?DVD", value="HD-DVD")
rebulk.regex("Blu-?ray(?:-?Rip)?", "B[DR]", "B[DR]-?Rip", "BD[59]", "BD25", "BD50", value="BluRay")
rebulk.regex("AHDTV", value="AHDTV")
rebulk.regex('UHD-?TV', 'UHD-?Rip', value='UHDTV',
conflict_solver=lambda match, other: other if other.name == 'other' else '__default__')
rebulk.regex("HDTC", value="HDTC")
rebulk.regex("DSR", "DSR?-?Rip", "SAT-?Rip", "DTH", "DTH-?Rip", value="SATRip")
rebulk.rules(ValidateFormat)
return rebulk
class ValidateFormat(Rule):
"""
Validate format with screener property, with video_codec property or separated
"""
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for format_match in matches.named('format'):
if not seps_before(format_match) and \
not matches.range(format_match.start - 1, format_match.start - 2,
lambda match: 'format-prefix' in match.tags):
ret.append(format_match)
continue
if not seps_after(format_match) and \
not matches.range(format_match.end, format_match.end + 1,
lambda match: 'format-suffix' in match.tags):
ret.append(format_match)
continue
return ret

View file

@ -11,55 +11,82 @@ import babelfish
from rebulk import Rebulk, Rule, RemoveMatch, RenameMatch
from rebulk.remodule import re
from ..common.words import iter_words, COMMON_WORDS
from ..common import seps
from ..common.pattern import is_disabled
from ..common.words import iter_words
from ..common.validators import seps_surround
def language():
def language(config, common_words):
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:param common_words: common words
:type common_words: set
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk()
subtitle_both = config['subtitle_affixes']
subtitle_prefixes = sorted(subtitle_both + config['subtitle_prefixes'], key=length_comparator)
subtitle_suffixes = sorted(subtitle_both + config['subtitle_suffixes'], key=length_comparator)
lang_both = config['language_affixes']
lang_prefixes = sorted(lang_both + config['language_prefixes'], key=length_comparator)
lang_suffixes = sorted(lang_both + config['language_suffixes'], key=length_comparator)
weak_affixes = frozenset(config['weak_affixes'])
rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'language') and
is_disabled(context, 'subtitle_language')))
rebulk.string(*subtitle_prefixes, name="subtitle_language.prefix", ignore_case=True, private=True,
validator=seps_surround, tags=['release-group-prefix'])
validator=seps_surround, tags=['release-group-prefix'],
disabled=lambda context: is_disabled(context, 'subtitle_language'))
rebulk.string(*subtitle_suffixes, name="subtitle_language.suffix", ignore_case=True, private=True,
validator=seps_surround)
validator=seps_surround,
disabled=lambda context: is_disabled(context, 'subtitle_language'))
rebulk.string(*lang_suffixes, name="language.suffix", ignore_case=True, private=True,
validator=seps_surround, tags=['format-suffix'])
rebulk.functional(find_languages, properties={'language': [None]})
rebulk.rules(SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule)
validator=seps_surround, tags=['source-suffix'],
disabled=lambda context: is_disabled(context, 'language'))
def find_languages(string, context=None):
"""Find languages in the string
:return: list of tuple (property, Language, lang_word, word)
"""
return LanguageFinder(context, subtitle_prefixes, subtitle_suffixes,
lang_prefixes, lang_suffixes, weak_affixes).find(string)
rebulk.functional(find_languages,
properties={'language': [None]},
disabled=lambda context: not context.get('allowed_languages'))
rebulk.rules(SubtitleExtensionRule,
SubtitlePrefixLanguageRule,
SubtitleSuffixLanguageRule,
RemoveLanguage,
RemoveInvalidLanguages(common_words))
babelfish.language_converters['guessit'] = GuessitConverter(config['synonyms'])
return rebulk
COMMON_WORDS_STRICT = frozenset(['brazil'])
UNDETERMINED = babelfish.Language('und')
SYN = {('ell', None): ['gr', 'greek'],
('spa', None): ['esp', 'español', 'espanol'],
('fra', None): ['français', 'vf', 'vff', 'vfi', 'vfq'],
('swe', None): ['se'],
('por', 'BR'): ['po', 'pb', 'pob', 'ptbr', 'br', 'brazilian'],
('cat', None): ['català', 'castellano', 'espanol castellano', 'español castellano'],
('ces', None): ['cz'],
('ukr', None): ['ua'],
('zho', None): ['cn'],
('jpn', None): ['jp'],
('hrv', None): ['scr'],
('mul', None): ['multi', 'dl']} # http://scenelingo.wordpress.com/2009/03/24/what-does-dl-mean/
MULTIPLE = babelfish.Language('mul')
NON_SPECIFIC_LANGUAGES = frozenset([UNDETERMINED, MULTIPLE])
class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=missing-docstring
_with_country_regexp = re.compile(r'(.*)\((.*)\)')
_with_country_regexp2 = re.compile(r'(.*)-(.*)')
def __init__(self):
def __init__(self, synonyms):
self.guessit_exceptions = {}
for (alpha3, country), synlist in SYN.items():
for code, synlist in synonyms.items():
if '_' in code:
(alpha3, country) = code.split('_')
else:
(alpha3, country) = (code, None)
for syn in synlist:
self.guessit_exceptions[syn.lower()] = (alpha3, country, None)
@ -76,15 +103,7 @@ class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=m
return str(babelfish.Language(alpha3, country, script))
def reverse(self, name): # pylint:disable=arguments-differ
with_country = (GuessitConverter._with_country_regexp.match(name) or
GuessitConverter._with_country_regexp2.match(name))
name = name.lower()
if with_country:
lang = babelfish.Language.fromguessit(with_country.group(1).strip())
lang.country = babelfish.Country.fromguessit(with_country.group(2).strip())
return lang.alpha3, lang.country.alpha2 if lang.country else None, lang.script or None
# exceptions come first, as they need to override a potential match
# with any of the other guessers
try:
@ -96,7 +115,8 @@ class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=m
babelfish.Language.fromalpha3b,
babelfish.Language.fromalpha2,
babelfish.Language.fromname,
babelfish.Language.fromopensubtitles]:
babelfish.Language.fromopensubtitles,
babelfish.Language.fromietf]:
try:
reverse = conv(name)
return reverse.alpha3, reverse.country, reverse.script
@ -113,24 +133,6 @@ def length_comparator(value):
return len(value)
babelfish.language_converters['guessit'] = GuessitConverter()
subtitle_both = ['sub', 'subs', 'subbed', 'custom subbed', 'custom subs',
'custom sub', 'customsubbed', 'customsubs', 'customsub',
'soft subtitles', 'soft subs']
subtitle_prefixes = sorted(subtitle_both +
['st', 'vost', 'subforced', 'fansub', 'hardsub',
'legenda', 'legendas', 'legendado', 'subtitulado',
'soft', 'subtitles'], key=length_comparator)
subtitle_suffixes = sorted(subtitle_both +
['subforced', 'fansub', 'hardsub'], key=length_comparator)
lang_both = ['dublado', 'dubbed', 'dub']
lang_suffixes = sorted(lang_both + ['audio'], key=length_comparator)
lang_prefixes = sorted(lang_both + ['true'], key=length_comparator)
weak_prefixes = ('audio', 'true')
_LanguageMatch = namedtuple('_LanguageMatch', ['property_name', 'word', 'lang'])
@ -149,7 +151,7 @@ class LanguageWord(object):
self.next_word = next_word
@property
def extended_word(self):
def extended_word(self): # pylint:disable=inconsistent-return-statements
"""
Return the extended word for this instance, if any.
"""
@ -175,10 +177,17 @@ def to_rebulk_match(language_match):
end = word.end
name = language_match.property_name
if language_match.lang == UNDETERMINED:
return start, end, dict(name=name, value=word.value.lower(),
formatter=babelfish.Language, tags=['weak-language'])
return start, end, {
'name': name,
'value': word.value.lower(),
'formatter': babelfish.Language,
'tags': ['weak-language']
}
return start, end, dict(name=name, value=language_match.lang)
return start, end, {
'name': name,
'value': language_match.lang
}
class LanguageFinder(object):
@ -186,10 +195,21 @@ class LanguageFinder(object):
Helper class to search and return language matches: 'language' and 'subtitle_language' properties
"""
def __init__(self, allowed_languages):
self.parsed = dict()
self.allowed_languages = allowed_languages
self.common_words = COMMON_WORDS_STRICT if allowed_languages else COMMON_WORDS
def __init__(self, context,
subtitle_prefixes, subtitle_suffixes,
lang_prefixes, lang_suffixes, weak_affixes):
allowed_languages = context.get('allowed_languages') if context else None
self.allowed_languages = {l.lower() for l in allowed_languages or []}
self.weak_affixes = weak_affixes
self.prefixes_map = {}
self.suffixes_map = {}
if not is_disabled(context, 'subtitle_language'):
self.prefixes_map['subtitle_language'] = subtitle_prefixes
self.suffixes_map['subtitle_language'] = subtitle_suffixes
self.prefixes_map['language'] = lang_prefixes
self.suffixes_map['language'] = lang_suffixes
def find(self, string):
"""
@ -250,11 +270,11 @@ class LanguageFinder(object):
"""
tuples = [
(language_word, language_word.next_word,
dict(subtitle_language=subtitle_prefixes, language=lang_prefixes),
self.prefixes_map,
lambda string, prefix: string.startswith(prefix),
lambda string, prefix: string[len(prefix):]),
(language_word.next_word, language_word,
dict(subtitle_language=subtitle_suffixes, language=lang_suffixes),
self.suffixes_map,
lambda string, suffix: string.endswith(suffix),
lambda string, suffix: string[:len(string) - len(suffix)])
]
@ -271,7 +291,7 @@ class LanguageFinder(object):
if match:
yield match
def find_match_for_word(self, word, fallback_word, affixes, is_affix, strip_affix):
def find_match_for_word(self, word, fallback_word, affixes, is_affix, strip_affix): # pylint:disable=inconsistent-return-statements
"""
Return the language match for the given word and affixes.
"""
@ -280,8 +300,6 @@ class LanguageFinder(object):
continue
word_lang = current_word.value.lower()
if word_lang in self.common_words:
continue
for key, parts in affixes.items():
for part in parts:
@ -291,30 +309,31 @@ class LanguageFinder(object):
match = None
value = strip_affix(word_lang, part)
if not value:
if fallback_word:
match = self.find_language_match_for_word(fallback_word, key=key, force=True)
if fallback_word and (
abs(fallback_word.start - word.end) <= 1 or abs(word.start - fallback_word.end) <= 1):
match = self.find_language_match_for_word(fallback_word, key=key)
if not match and part not in weak_prefixes:
if not match and part not in self.weak_affixes:
match = self.create_language_match(key, LanguageWord(current_word.start, current_word.end,
'und', current_word.input_string))
elif value not in self.common_words:
else:
match = self.create_language_match(key, LanguageWord(current_word.start, current_word.end,
value, current_word.input_string))
if match:
return match
def find_language_match_for_word(self, word, key='language', force=False):
def find_language_match_for_word(self, word, key='language'): # pylint:disable=inconsistent-return-statements
"""
Return the language match for the given word.
"""
for current_word in (word.extended_word, word):
if current_word and (force or current_word.value.lower() not in self.common_words):
if current_word:
match = self.create_language_match(key, current_word)
if match:
return match
def create_language_match(self, key, word):
def create_language_match(self, key, word): # pylint:disable=inconsistent-return-statements
"""
Create a LanguageMatch for a given word
"""
@ -323,40 +342,21 @@ class LanguageFinder(object):
if lang is not None:
return _LanguageMatch(property_name=key, word=word, lang=lang)
def parse_language(self, lang_word):
def parse_language(self, lang_word): # pylint:disable=inconsistent-return-statements
"""
Parse the lang_word into a valid Language.
Multi and Undetermined languages are also valid languages.
"""
if lang_word in self.parsed:
return self.parsed[lang_word]
try:
lang = babelfish.Language.fromguessit(lang_word)
if self.allowed_languages:
if (hasattr(lang, 'name') and lang.name.lower() in self.allowed_languages) \
or (hasattr(lang, 'alpha2') and lang.alpha2.lower() in self.allowed_languages) \
or lang.alpha3.lower() in self.allowed_languages:
self.parsed[lang_word] = lang
return lang
# Keep language with alpha2 equivalent. Others are probably
# uncommon languages.
elif lang in ('mul', UNDETERMINED) or hasattr(lang, 'alpha2'):
self.parsed[lang_word] = lang
if ((hasattr(lang, 'name') and lang.name.lower() in self.allowed_languages) or
(hasattr(lang, 'alpha2') and lang.alpha2.lower() in self.allowed_languages) or
lang.alpha3.lower() in self.allowed_languages):
return lang
self.parsed[lang_word] = None
except babelfish.Error:
self.parsed[lang_word] = None
def find_languages(string, context=None):
"""Find languages in the string
:return: list of tuple (property, Language, lang_word, word)
"""
return LanguageFinder(context.get('allowed_languages')).find(string)
pass
class SubtitlePrefixLanguageRule(Rule):
@ -367,6 +367,9 @@ class SubtitlePrefixLanguageRule(Rule):
properties = {'subtitle_language': [None]}
def enabled(self, context):
return not is_disabled(context, 'subtitle_language')
def when(self, matches, context):
to_rename = []
to_remove = matches.named('subtitle_language.prefix')
@ -387,7 +390,9 @@ class SubtitlePrefixLanguageRule(Rule):
to_remove.extend(matches.conflicting(lang))
if prefix in to_remove:
to_remove.remove(prefix)
return to_rename, to_remove
if to_rename or to_remove:
return to_rename, to_remove
return False
def then(self, matches, when_response, context):
to_rename, to_remove = when_response
@ -412,6 +417,9 @@ class SubtitleSuffixLanguageRule(Rule):
properties = {'subtitle_language': [None]}
def enabled(self, context):
return not is_disabled(context, 'subtitle_language')
def when(self, matches, context):
to_append = []
to_remove = matches.named('subtitle_language.suffix')
@ -421,7 +429,9 @@ class SubtitleSuffixLanguageRule(Rule):
to_append.append(lang)
if suffix in to_remove:
to_remove.remove(suffix)
return to_append, to_remove
if to_append or to_remove:
return to_append, to_remove
return False
def then(self, matches, when_response, context):
to_rename, to_remove = when_response
@ -436,17 +446,65 @@ class SubtitleExtensionRule(Rule):
"""
Convert language guess as subtitle_language if next match is a subtitle extension.
Since it's a strong match, it also removes any conflicting format with it.
Since it's a strong match, it also removes any conflicting source with it.
"""
consequence = [RemoveMatch, RenameMatch('subtitle_language')]
properties = {'subtitle_language': [None]}
def when(self, matches, context):
def enabled(self, context):
return not is_disabled(context, 'subtitle_language')
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
subtitle_extension = matches.named('container',
lambda match: 'extension' in match.tags and 'subtitle' in match.tags,
0)
if subtitle_extension:
subtitle_lang = matches.previous(subtitle_extension, lambda match: match.name == 'language', 0)
if subtitle_lang:
return matches.conflicting(subtitle_lang, lambda m: m.name == 'format'), subtitle_lang
for weak in matches.named('subtitle_language', predicate=lambda m: 'weak-language' in m.tags):
weak.private = True
return matches.conflicting(subtitle_lang, lambda m: m.name == 'source'), subtitle_lang
class RemoveLanguage(Rule):
"""Remove language matches that were not converted to subtitle_language when language is disabled."""
consequence = RemoveMatch
def enabled(self, context):
return is_disabled(context, 'language')
def when(self, matches, context):
return matches.named('language')
class RemoveInvalidLanguages(Rule):
"""Remove language matches that matches the blacklisted common words."""
consequence = RemoveMatch
priority = 32
def __init__(self, common_words):
"""Constructor."""
super(RemoveInvalidLanguages, self).__init__()
self.common_words = common_words
def when(self, matches, context):
to_remove = []
for match in matches.range(0, len(matches.input_string),
predicate=lambda m: m.name in ('language', 'subtitle_language')):
if match.raw.lower() not in self.common_words:
continue
group = matches.markers.at_match(match, index=0, predicate=lambda m: m.name == 'group')
if group and (
not matches.range(
group.start, group.end, predicate=lambda m: m.name not in ('language', 'subtitle_language')
) and (not matches.holes(group.start, group.end, predicate=lambda m: m.value.strip(seps)))):
continue
to_remove.append(match)
return to_remove

View file

@ -8,16 +8,23 @@ import mimetypes
from rebulk import Rebulk, CustomRule, POST_PROCESS
from rebulk.match import Match
from ..common.pattern import is_disabled
from ...rules.processors import Processors
def mimetype():
def mimetype(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
return Rebulk().rules(Mimetype)
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'mimetype'))
rebulk.rules(Mimetype)
return rebulk
class Mimetype(CustomRule):

View file

@ -5,38 +5,55 @@ other property
"""
import copy
from rebulk import Rebulk, Rule, RemoveMatch, POST_PROCESS, AppendMatch
from rebulk import Rebulk, Rule, RemoveMatch, RenameMatch, POST_PROCESS, AppendMatch
from rebulk.remodule import re
from ..common import dash
from ..common import seps
from ..common.validators import seps_after, seps_before, seps_surround, compose
from ..common.pattern import is_disabled
from ..common.validators import seps_after, seps_before, seps_surround, and_
from ...reutils import build_or_pattern
from ...rules.common.formatters import raw_cleanup
def other():
def other(config): # pylint:disable=unused-argument,too-many-statements
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'other'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk.defaults(name="other", validator=seps_surround)
rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='AudioFix')
rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='SyncFix')
rebulk.regex('Dual', 'Dual-?Audio', value='DualAudio')
rebulk.regex('ws', 'wide-?screen', value='WideScreen')
rebulk.regex('Re-?Enc(?:oded)?', value='ReEncoded')
rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='Audio Fixed')
rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='Sync Fixed')
rebulk.regex('Dual', 'Dual-?Audio', value='Dual Audio')
rebulk.regex('ws', 'wide-?screen', value='Widescreen')
rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded')
rebulk.string('Real', 'Fix', 'Fixed', value='Proper', tags=['has-neighbor-before', 'has-neighbor-after'])
rebulk.string('Proper', 'Repack', 'Rerip', 'Dirfix', 'Nfofix', 'Prooffix', value='Proper',
rebulk.string('Repack', 'Rerip', value='Proper',
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('(?:Proof-?)?Sample-?Fix', value='Proper',
rebulk.string('Proper', value='Proper',
tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('Real-Proper', 'Real-Repack', 'Real-Rerip', value='Proper',
tags=['streaming_service.prefix', 'streaming_service.suffix', 'real'])
rebulk.regex('Real', value='Proper',
tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix', 'real'])
rebulk.string('Fix', 'Fixed', value='Fix', tags=['has-neighbor-before', 'has-neighbor-after',
'streaming_service.prefix', 'streaming_service.suffix'])
rebulk.string('Dirfix', 'Nfofix', 'Prooffix', value='Fix',
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('(?:Proof-?)?Sample-?Fix', value='Fix',
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.string('Fansub', value='Fansub', tags='has-neighbor')
rebulk.string('Fastsub', value='Fastsub', tags='has-neighbor')
rebulk.string('Fansub', value='Fan Subtitled', tags='has-neighbor')
rebulk.string('Fastsub', value='Fast Subtitled', tags='has-neighbor')
season_words = build_or_pattern(["seasons?", "series?"])
complete_articles = build_or_pattern(["The"])
@ -60,30 +77,42 @@ def other():
private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'],
value={'other': 'Complete'},
tags=['release-group-prefix'],
validator={'__parent__': compose(seps_surround, validate_complete)})
rebulk.string('R5', 'RC', value='R5')
validator={'__parent__': and_(seps_surround, validate_complete)})
rebulk.string('R5', value='Region 5')
rebulk.string('RC', value='Region C')
rebulk.regex('Pre-?Air', value='Preair')
rebulk.regex('(?:PS-?)?Vita', value='PS Vita')
rebulk.regex('(?:PS-?)Vita', value='PS Vita')
rebulk.regex('Vita', value='PS Vita', tags='has-neighbor')
rebulk.regex('(HD)(?P<another>Rip)', value={'other': 'HD', 'another': 'Rip'},
private_parent=True, children=True, validator={'__parent__': seps_surround}, validate_all=True)
for value in (
'Screener', 'Remux', '3D', 'mHD', 'HDLight', 'HQ', 'DDC', 'HR', 'PAL', 'SECAM', 'NTSC',
'CC', 'LD', 'MD', 'XXX'):
for value in ('Screener', 'Remux', 'PAL', 'SECAM', 'NTSC', 'XXX'):
rebulk.string(value, value=value)
rebulk.string('3D', value='3D', tags='has-neighbor')
rebulk.string('LDTV', value='LD')
rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor')
rebulk.string('HR', value='High Resolution')
rebulk.string('LD', value='Line Dubbed')
rebulk.string('MD', value='Mic Dubbed')
rebulk.string('mHD', 'HDLight', value='Micro HD')
rebulk.string('LDTV', value='Low Definition')
rebulk.string('HFR', value='High Frame Rate')
rebulk.string('VFR', value='Variable Frame Rate')
rebulk.string('HD', value='HD', validator=None,
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('Full-?HD', 'FHD', value='FullHD', validator=None,
rebulk.regex('Full-?HD', 'FHD', value='Full HD', validator=None,
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('Ultra-?(?:HD)?', 'UHD', value='UltraHD', validator=None,
rebulk.regex('Ultra-?(?:HD)?', 'UHD', value='Ultra HD', validator=None,
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('Upscaled?', value='Upscaled')
for value in ('Complete', 'Classic', 'LiNE', 'Bonus', 'Trailer', 'FINAL', 'Retail',
for value in ('Complete', 'Classic', 'Bonus', 'Trailer', 'Retail',
'Colorized', 'Internal'):
rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
rebulk.regex('LiNE', value='Line Audio', tags=['has-neighbor-before', 'has-neighbor-after', 'release-group-prefix'])
rebulk.regex('Read-?NFO', value='Read NFO')
rebulk.string('CONVERT', value='Converted', tags='has-neighbor')
rebulk.string('DOCU', value='Documentary', tags='has-neighbor')
rebulk.string('DOCU', 'DOKU', value='Documentary', tags='has-neighbor')
rebulk.string('OM', value='Open Matte', tags='has-neighbor')
rebulk.string('STV', value='Straight to Video', tags='has-neighbor')
rebulk.string('OAR', value='Original Aspect Ratio', tags='has-neighbor')
@ -92,16 +121,30 @@ def other():
for coast in ('East', 'West'):
rebulk.regex(r'(?:Live-)?(?:Episode-)?' + coast + '-?(?:Coast-)?Feed', value=coast + ' Coast Feed')
rebulk.string('VO', 'OV', value='OV', tags='has-neighbor')
rebulk.string('VO', 'OV', value='Original Video', tags='has-neighbor')
rebulk.string('Ova', 'Oav', value='Original Animated Video')
rebulk.regex('Scr(?:eener)?', value='Screener', validator=None,
tags=['other.validate.screener', 'format-prefix', 'format-suffix'])
tags=['other.validate.screener', 'source-prefix', 'source-suffix'])
rebulk.string('Mux', value='Mux', validator=seps_after,
tags=['other.validate.mux', 'video-codec-prefix', 'format-suffix'])
rebulk.string('HC', value='Hardcoded Subtitles')
tags=['other.validate.mux', 'video-codec-prefix', 'source-suffix'])
rebulk.string('HC', 'vost', value='Hardcoded Subtitles')
rebulk.rules(ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore, ValidateScreenerRule,
ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor, ProperCountRule)
rebulk.string('SDR', value='Standard Dynamic Range', tags='uhdbluray-neighbor')
rebulk.regex('HDR(?:10)?', value='HDR10', tags='uhdbluray-neighbor')
rebulk.regex('Dolby-?Vision', value='Dolby Vision', tags='uhdbluray-neighbor')
rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor')
rebulk.string('Sample', value='Sample', tags=['at-end', 'not-a-release-group'])
rebulk.string('Extras', value='Extras', tags='has-neighbor')
rebulk.regex('Digital-?Extras?', value='Extras')
rebulk.string('Proof', value='Proof', tags=['at-end', 'not-a-release-group'])
rebulk.string('Obfuscated', 'Scrambled', value='Obfuscated', tags=['at-end', 'not-a-release-group'])
rebulk.string('xpost', 'postbot', 'asrequested', value='Repost', tags='not-a-release-group')
rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore,
ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor,
ValidateAtEnd, ValidateReal, ProperCountRule)
return rebulk
@ -116,7 +159,7 @@ class ProperCountRule(Rule):
properties = {'proper_count': [None]}
def when(self, matches, context):
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
propers = matches.named('other', lambda match: match.value == 'Proper')
if propers:
raws = {} # Count distinct raw values
@ -124,15 +167,32 @@ class ProperCountRule(Rule):
raws[raw_cleanup(proper.raw)] = proper
proper_count_match = copy.copy(propers[-1])
proper_count_match.name = 'proper_count'
proper_count_match.value = len(raws)
value = 0
for raw in raws.values():
value += 2 if 'real' in raw.tags else 1
proper_count_match.value = value
return proper_count_match
class RenameAnotherToOther(Rule):
"""
Rename `another` properties to `other`
"""
priority = 32
consequence = RenameMatch('other')
def when(self, matches, context):
return matches.named('another')
class ValidateHasNeighbor(Rule):
"""
Validate tag has-neighbor
"""
consequence = RemoveMatch
priority = 64
def when(self, matches, context):
ret = []
@ -158,6 +218,7 @@ class ValidateHasNeighborBefore(Rule):
Validate tag has-neighbor-before that previous match exists.
"""
consequence = RemoveMatch
priority = 64
def when(self, matches, context):
ret = []
@ -177,6 +238,7 @@ class ValidateHasNeighborAfter(Rule):
Validate tag has-neighbor-after that next match exists.
"""
consequence = RemoveMatch
priority = 64
def when(self, matches, context):
ret = []
@ -201,8 +263,8 @@ class ValidateScreenerRule(Rule):
def when(self, matches, context):
ret = []
for screener in matches.named('other', lambda match: 'other.validate.screener' in match.tags):
format_match = matches.previous(screener, lambda match: match.name == 'format', 0)
if not format_match or matches.input_string[format_match.end:screener.start].strip(seps):
source_match = matches.previous(screener, lambda match: match.initiator.name == 'source', 0)
if not source_match or matches.input_string[source_match.end:screener.start].strip(seps):
ret.append(screener)
return ret
@ -217,8 +279,8 @@ class ValidateMuxRule(Rule):
def when(self, matches, context):
ret = []
for mux in matches.named('other', lambda match: 'other.validate.mux' in match.tags):
format_match = matches.previous(mux, lambda match: match.name == 'format', 0)
if not format_match:
source_match = matches.previous(mux, lambda match: match.initiator.name == 'source', 0)
if not source_match:
ret.append(mux)
return ret
@ -257,16 +319,18 @@ class ValidateStreamingServiceNeighbor(Rule):
def when(self, matches, context):
to_remove = []
for match in matches.named('other',
predicate=lambda m: ('streaming_service.prefix' in m.tags or
'streaming_service.suffix' in m.tags)):
predicate=lambda m: (m.initiator.name != 'source'
and ('streaming_service.prefix' in m.tags
or 'streaming_service.suffix' in m.tags))):
match = match.initiator
if not seps_after(match):
if 'streaming_service.prefix' in match.tags:
next_match = matches.next(match, lambda m: m.name == 'streaming_service', 0)
if next_match and not matches.holes(match.end, next_match.start,
predicate=lambda m: m.value.strip(seps)):
continue
if match.children:
to_remove.extend(match.children)
to_remove.append(match)
elif not seps_before(match):
@ -276,6 +340,44 @@ class ValidateStreamingServiceNeighbor(Rule):
predicate=lambda m: m.value.strip(seps)):
continue
if match.children:
to_remove.extend(match.children)
to_remove.append(match)
return to_remove
class ValidateAtEnd(Rule):
"""Validate other which should occur at the end of a filepart."""
priority = 32
consequence = RemoveMatch
def when(self, matches, context):
to_remove = []
for filepart in matches.markers.named('path'):
for match in matches.range(filepart.start, filepart.end,
predicate=lambda m: m.name == 'other' and 'at-end' in m.tags):
if (matches.holes(match.end, filepart.end, predicate=lambda m: m.value.strip(seps)) or
matches.range(match.end, filepart.end, predicate=lambda m: m.name not in (
'other', 'container'))):
to_remove.append(match)
return to_remove
class ValidateReal(Rule):
"""
Validate Real
"""
consequence = RemoveMatch
priority = 64
def when(self, matches, context):
ret = []
for filepart in matches.markers.named('path'):
for match in matches.range(filepart.start, filepart.end, lambda m: m.name == 'other' and 'real' in m.tags):
if not matches.range(filepart.start, match.start):
ret.append(match)
return ret

View file

@ -7,20 +7,25 @@ from rebulk.remodule import re
from rebulk import Rebulk
from ..common import dash
from ..common.validators import seps_surround, int_coercable, compose
from ..common.pattern import is_disabled
from ..common.validators import seps_surround, int_coercable, and_
from ..common.numeral import numeral, parse_numeral
from ...reutils import build_or_pattern
def part():
def part(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], validator={'__parent__': seps_surround})
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'part'))
rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], validator={'__parent__': seps_surround})
prefixes = ['pt', 'part']
prefixes = config['prefixes']
def validate_roman(match):
"""
@ -36,6 +41,6 @@ def part():
rebulk.regex(build_or_pattern(prefixes) + r'-?(?P<part>' + numeral + r')',
prefixes=prefixes, validate_all=True, private_parent=True, children=True, formatter=parse_numeral,
validator={'part': compose(validate_roman, lambda m: 0 < m.value < 100)})
validator={'part': and_(validate_roman, lambda m: 0 < m.value < 100)})
return rebulk

View file

@ -6,22 +6,53 @@ release_group property
import copy
from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch
from rebulk.match import Match
from ..common import seps
from ..common.expected import build_expected_function
from ..common.comparators import marker_sorted
from ..common.expected import build_expected_function
from ..common.formatters import cleanup
from ..common.pattern import is_disabled
from ..common.validators import int_coercable, seps_surround
from ..properties.title import TitleFromPosition
def release_group():
def release_group(config):
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk()
forbidden_groupnames = config['forbidden_names']
groupname_ignore_seps = config['ignored_seps']
groupname_seps = ''.join([c for c in seps if c not in groupname_ignore_seps])
def clean_groupname(string):
"""
Removes and strip separators from input_string
:param string:
:type string:
:return:
:rtype:
"""
string = string.strip(groupname_seps)
if not (string.endswith(tuple(groupname_ignore_seps)) and string.startswith(tuple(groupname_ignore_seps))) \
and not any(i in string.strip(groupname_ignore_seps) for i in groupname_ignore_seps):
string = string.strip(groupname_ignore_seps)
for forbidden in forbidden_groupnames:
if string.lower().startswith(forbidden) and string[len(forbidden):len(forbidden) + 1] in seps:
string = string[len(forbidden):]
string = string.strip(groupname_seps)
if string.lower().endswith(forbidden) and string[-len(forbidden) - 1:-len(forbidden)] in seps:
string = string[:len(forbidden)]
string = string.strip(groupname_seps)
return string.strip()
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'release_group'))
expected_group = build_expected_function('expected_group')
@ -30,42 +61,142 @@ def release_group():
conflict_solver=lambda match, other: other,
disabled=lambda context: not context.get('expected_group'))
return rebulk.rules(SceneReleaseGroup, AnimeReleaseGroup)
return rebulk.rules(
DashSeparatedReleaseGroup(clean_groupname),
SceneReleaseGroup(clean_groupname),
AnimeReleaseGroup
)
forbidden_groupnames = ['rip', 'by', 'for', 'par', 'pour', 'bonus']
groupname_ignore_seps = '[]{}()'
groupname_seps = ''.join([c for c in seps if c not in groupname_ignore_seps])
def clean_groupname(string):
"""
Removes and strip separators from input_string
:param string:
:type string:
:return:
:rtype:
"""
string = string.strip(groupname_seps)
if not (string.endswith(tuple(groupname_ignore_seps)) and string.startswith(tuple(groupname_ignore_seps))) \
and not any(i in string.strip(groupname_ignore_seps) for i in groupname_ignore_seps):
string = string.strip(groupname_ignore_seps)
for forbidden in forbidden_groupnames:
if string.lower().startswith(forbidden) and string[len(forbidden):len(forbidden)+1] in seps:
string = string[len(forbidden):]
string = string.strip(groupname_seps)
if string.lower().endswith(forbidden) and string[-len(forbidden)-1:-len(forbidden)] in seps:
string = string[:len(forbidden)]
string = string.strip(groupname_seps)
return string
_scene_previous_names = ['video_codec', 'format', 'video_api', 'audio_codec', 'audio_profile', 'video_profile',
_scene_previous_names = ('video_codec', 'source', 'video_api', 'audio_codec', 'audio_profile', 'video_profile',
'audio_channels', 'screen_size', 'other', 'container', 'language', 'subtitle_language',
'subtitle_language.suffix', 'subtitle_language.prefix', 'language.suffix']
'subtitle_language.suffix', 'subtitle_language.prefix', 'language.suffix')
_scene_previous_tags = ['release-group-prefix']
_scene_previous_tags = ('release-group-prefix',)
_scene_no_previous_tags = ('no-release-group-prefix',)
class DashSeparatedReleaseGroup(Rule):
"""
Detect dash separated release groups that might appear at the end or at the beginning of a release name.
Series.S01E02.Pilot.DVDRip.x264-CS.mkv
release_group: CS
abc-the.title.name.1983.1080p.bluray.x264.mkv
release_group: abc
At the end: Release groups should be dash-separated and shouldn't contain spaces nor
appear in a group with other matches. The preceding matches should be separated by dot.
If a release group is found, the conflicting matches are removed.
At the beginning: Release groups should be dash-separated and shouldn't contain spaces nor appear in a group.
It should be followed by a hole with dot-separated words.
Detection only happens if no matches exist at the beginning.
"""
consequence = [RemoveMatch, AppendMatch]
def __init__(self, value_formatter):
"""Default constructor."""
super(DashSeparatedReleaseGroup, self).__init__()
self.value_formatter = value_formatter
@classmethod
def is_valid(cls, matches, candidate, start, end, at_end): # pylint:disable=inconsistent-return-statements
"""
Whether a candidate is a valid release group.
"""
if not at_end:
if len(candidate.value) <= 1:
return False
if matches.markers.at_match(candidate, predicate=lambda m: m.name == 'group'):
return False
first_hole = matches.holes(candidate.end, end, predicate=lambda m: m.start == candidate.end, index=0)
if not first_hole:
return False
raw_value = first_hole.raw
return raw_value[0] == '-' and '-' not in raw_value[1:] and '.' in raw_value and ' ' not in raw_value
group = matches.markers.at_match(candidate, predicate=lambda m: m.name == 'group', index=0)
if group and matches.at_match(group, predicate=lambda m: not m.private and m.span != candidate.span):
return False
count = 0
match = candidate
while match:
current = matches.range(start,
match.start,
index=-1,
predicate=lambda m: not m.private and not 'expected' in m.tags)
if not current:
break
separator = match.input_string[current.end:match.start]
if not separator and match.raw[0] == '-':
separator = '-'
match = current
if count == 0:
if separator != '-':
break
count += 1
continue
if separator == '.':
return True
def detect(self, matches, start, end, at_end): # pylint:disable=inconsistent-return-statements
"""
Detect release group at the end or at the beginning of a filepart.
"""
candidate = None
if at_end:
container = matches.ending(end, lambda m: m.name == 'container', index=0)
if container:
end = container.start
candidate = matches.ending(end, index=0, predicate=(
lambda m: not m.private and not (
m.name == 'other' and 'not-a-release-group' in m.tags
) and '-' not in m.raw and m.raw.strip() == m.raw))
if not candidate:
if at_end:
candidate = matches.holes(start, end, seps=seps, index=-1,
predicate=lambda m: m.end == end and m.raw.strip(seps) and m.raw[0] == '-')
else:
candidate = matches.holes(start, end, seps=seps, index=0,
predicate=lambda m: m.start == start and m.raw.strip(seps))
if candidate and self.is_valid(matches, candidate, start, end, at_end):
return candidate
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
if matches.named('release_group'):
return
to_remove = []
to_append = []
for filepart in matches.markers.named('path'):
candidate = self.detect(matches, filepart.start, filepart.end, True)
if candidate:
to_remove.extend(matches.at_match(candidate))
else:
candidate = self.detect(matches, filepart.start, filepart.end, False)
if candidate:
releasegroup = Match(candidate.start, candidate.end, name='release_group',
formatter=self.value_formatter, input_string=candidate.input_string)
if releasegroup.value:
to_append.append(releasegroup)
if to_remove or to_append:
return to_remove, to_append
class SceneReleaseGroup(Rule):
@ -79,7 +210,23 @@ class SceneReleaseGroup(Rule):
properties = {'release_group': [None]}
def when(self, matches, context):
def __init__(self, value_formatter):
"""Default constructor."""
super(SceneReleaseGroup, self).__init__()
self.value_formatter = value_formatter
@staticmethod
def is_previous_match(match):
"""
Check if match can precede release_group
:param match:
:return:
"""
return not match.tagged(*_scene_no_previous_tags) if match.name in _scene_previous_names else \
match.tagged(*_scene_previous_tags)
def when(self, matches, context): # pylint:disable=too-many-locals
# If a release_group is found before, ignore this kind of release_group rule.
ret = []
@ -87,6 +234,8 @@ class SceneReleaseGroup(Rule):
for filepart in marker_sorted(matches.markers.named('path'), matches):
# pylint:disable=cell-var-from-loop
start, end = filepart.span
if matches.named('release_group', predicate=lambda m: m.start >= start and m.end <= end):
continue
titles = matches.named('title', predicate=lambda m: m.start >= start and m.end <= end)
@ -101,7 +250,7 @@ class SceneReleaseGroup(Rule):
"""
return match in titles[1:]
last_hole = matches.holes(start, end + 1, formatter=clean_groupname,
last_hole = matches.holes(start, end + 1, formatter=self.value_formatter,
ignore=keep_only_first_title,
predicate=lambda hole: cleanup(hole.value), index=-1)
@ -118,13 +267,12 @@ class SceneReleaseGroup(Rule):
if match.start < filepart.start:
return False
return not match.private or match.name in _scene_previous_names
return not match.private or self.is_previous_match(match)
previous_match = matches.previous(last_hole,
previous_match_filter,
index=0)
if previous_match and (previous_match.name in _scene_previous_names or
any(tag in previous_match.tags for tag in _scene_previous_tags)) and \
if previous_match and (self.is_previous_match(previous_match)) and \
not matches.input_string[previous_match.end:last_hole.start].strip(seps) \
and not int_coercable(last_hole.value.strip(seps)):
@ -134,7 +282,7 @@ class SceneReleaseGroup(Rule):
# if hole is inside a group marker with same value, remove [](){} ...
group = matches.markers.at_match(last_hole, lambda marker: marker.name == 'group', 0)
if group:
group.formatter = clean_groupname
group.formatter = self.value_formatter
if group.value == last_hole.value:
last_hole.start = group.start + 1
last_hole.end = group.end - 1
@ -165,11 +313,11 @@ class AnimeReleaseGroup(Rule):
# If a release_group is found before, ignore this kind of release_group rule.
if matches.named('release_group'):
return
return False
if not matches.named('episode') and not matches.named('season') and matches.named('release_group'):
# This doesn't seems to be an anime, and we already found another release_group.
return
return False
for filepart in marker_sorted(matches.markers.named('path'), matches):
@ -193,4 +341,7 @@ class AnimeReleaseGroup(Rule):
to_append.append(group)
to_remove.extend(matches.range(empty_group.start, empty_group.end,
lambda m: 'weak-language' in m.tags))
return to_remove, to_append
if to_remove or to_append:
return to_remove, to_append
return False

View file

@ -3,67 +3,115 @@
"""
screen_size property
"""
from rebulk.match import Match
from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch
from rebulk import Rebulk, Rule, RemoveMatch, AppendMatch
from ..common.pattern import is_disabled
from ..common.quantity import FrameRate
from ..common.validators import seps_surround
from ..common import dash, seps
from ...reutils import build_or_pattern
def screen_size():
def screen_size(config):
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
def conflict_solver(match, other):
"""
Conflict solver for most screen_size.
"""
if other.name == 'screen_size':
if 'resolution' in other.tags:
# The chtouile to solve conflict in "720 x 432" string matching both 720p pattern
int_value = _digits_re.findall(match.raw)[-1]
if other.value.startswith(int_value):
return match
return other
return '__default__'
interlaced = frozenset(config['interlaced'])
progressive = frozenset(config['progressive'])
frame_rates = [re.escape(rate) for rate in config['frame_rates']]
min_ar = config['min_ar']
max_ar = config['max_ar']
rebulk = Rebulk().string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE)
rebulk.defaults(name="screen_size", validator=seps_surround, conflict_solver=conflict_solver)
rebulk = Rebulk()
rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE)
rebulk.regex(r"(?:\d{3,}(?:x|\*))?360(?:i|p?x?)", value="360p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?368(?:i|p?x?)", value="368p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?480(?:i|p?x?)", value="480p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?576(?:i|p?x?)", value="576p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:i|p?(?:50|60)?x?)", value="720p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:p(?:50|60)?x?)", value="720p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?720p?hd", value="720p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?900(?:i|p?x?)", value="900p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080i", value="1080i")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?x?", value="1080p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080(?:p(?:50|60)?x?)", value="1080p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?hd", value="1080p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?2160(?:i|p?x?)", value="4K")
rebulk.string('4k', value='4K')
rebulk.defaults(name='screen_size', validator=seps_surround, abbreviations=[dash],
disabled=lambda context: is_disabled(context, 'screen_size'))
_digits_re = re.compile(r'\d+')
frame_rate_pattern = build_or_pattern(frame_rates, name='frame_rate')
interlaced_pattern = build_or_pattern(interlaced, name='height')
progressive_pattern = build_or_pattern(progressive, name='height')
rebulk.defaults(name="screen_size", validator=seps_surround)
rebulk.regex(r'\d{3,}-?(?:x|\*)-?\d{3,}',
formatter=lambda value: 'x'.join(_digits_re.findall(value)),
abbreviations=[dash],
tags=['resolution'],
res_pattern = r'(?:(?P<width>\d{3,4})(?:x|\*))?'
rebulk.regex(res_pattern + interlaced_pattern + r'(?P<scan_type>i)' + frame_rate_pattern + '?')
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)' + frame_rate_pattern + '?')
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?(?:hd)')
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?x?')
rebulk.string('4k', value='2160p')
rebulk.regex(r'(?P<width>\d{3,4})-?(?:x|\*)-?(?P<height>\d{3,4})',
conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else other)
rebulk.rules(ScreenSizeOnlyOne, RemoveScreenSizeConflicts)
rebulk.regex(frame_rate_pattern + '(p|fps)', name='frame_rate',
formatter=FrameRate.fromstring, disabled=lambda context: is_disabled(context, 'frame_rate'))
rebulk.rules(PostProcessScreenSize(progressive, min_ar, max_ar), ScreenSizeOnlyOne, ResolveScreenSizeConflicts)
return rebulk
class PostProcessScreenSize(Rule):
"""
Process the screen size calculating the aspect ratio if available.
Convert to a standard notation (720p, 1080p, etc) when it's a standard resolution and
aspect ratio is valid or not available.
It also creates an aspect_ratio match when available.
"""
consequence = AppendMatch
def __init__(self, standard_heights, min_ar, max_ar):
super(PostProcessScreenSize, self).__init__()
self.standard_heights = standard_heights
self.min_ar = min_ar
self.max_ar = max_ar
def when(self, matches, context):
to_append = []
for match in matches.named('screen_size'):
if not is_disabled(context, 'frame_rate'):
for frame_rate in match.children.named('frame_rate'):
frame_rate.formatter = FrameRate.fromstring
to_append.append(frame_rate)
values = match.children.to_dict()
if 'height' not in values:
continue
scan_type = (values.get('scan_type') or 'p').lower()
height = values['height']
if 'width' not in values:
match.value = '{0}{1}'.format(height, scan_type)
continue
width = values['width']
calculated_ar = float(width) / float(height)
aspect_ratio = Match(match.start, match.end, input_string=match.input_string,
name='aspect_ratio', value=round(calculated_ar, 3))
if not is_disabled(context, 'aspect_ratio'):
to_append.append(aspect_ratio)
if height in self.standard_heights and self.min_ar < calculated_ar < self.max_ar:
match.value = '{0}{1}'.format(height, scan_type)
else:
match.value = '{0}x{1}'.format(width, height)
return to_append
class ScreenSizeOnlyOne(Rule):
"""
Keep a single screen_size pet filepath part.
Keep a single screen_size per filepath part.
"""
consequence = RemoveMatch
@ -72,15 +120,15 @@ class ScreenSizeOnlyOne(Rule):
for filepart in matches.markers.named('path'):
screensize = list(reversed(matches.range(filepart.start, filepart.end,
lambda match: match.name == 'screen_size')))
if len(screensize) > 1:
if len(screensize) > 1 and len(set((match.value for match in screensize))) > 1:
to_remove.extend(screensize[1:])
return to_remove
class RemoveScreenSizeConflicts(Rule):
class ResolveScreenSizeConflicts(Rule):
"""
Remove season and episode matches which conflicts with screen_size match.
Resolve screen_size conflicts with season and episode matches.
"""
consequence = RemoveMatch
@ -95,14 +143,21 @@ class RemoveScreenSizeConflicts(Rule):
if not conflicts:
continue
has_neighbor = False
video_profile = matches.range(screensize.end, filepart.end, lambda match: match.name == 'video_profile', 0)
if video_profile and not matches.holes(screensize.end, video_profile.start,
predicate=lambda h: h.value and h.value.strip(seps)):
to_remove.extend(conflicts)
has_neighbor = True
date = matches.previous(screensize, lambda match: match.name == 'date', 0)
if date and not matches.holes(date.end, screensize.start,
predicate=lambda h: h.value and h.value.strip(seps)):
previous = matches.previous(screensize, index=0, predicate=(
lambda m: m.name in ('date', 'source', 'other', 'streaming_service')))
if previous and not matches.holes(previous.end, screensize.start,
predicate=lambda h: h.value and h.value.strip(seps)):
to_remove.extend(conflicts)
has_neighbor = True
if not has_neighbor:
to_remove.append(screensize)
return to_remove

View file

@ -7,23 +7,24 @@ import re
from rebulk import Rebulk
from ..common.validators import seps_surround
from ..common import dash
from ..common.quantity import Size
from ..common.pattern import is_disabled
from ..common.validators import seps_surround
def size():
def size(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
def format_size(value):
"""Format size using uppercase and no space."""
return re.sub(r'(?<=\d)[.](?=[^\d])', '', value.upper())
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'size'))
rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk.defaults(name='size', validator=seps_surround)
rebulk.regex(r'\d+\.?[mgt]b', r'\d+\.\d+[mgt]b', formatter=format_size, tags=['release-group-prefix'])
rebulk.regex(r'\d+-?[mgt]b', r'\d+\.\d+-?[mgt]b', formatter=Size.fromstring, tags=['release-group-prefix'])
return rebulk

View file

@ -0,0 +1,235 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
source property
"""
import copy
from rebulk.remodule import re
from rebulk import AppendMatch, Rebulk, RemoveMatch, Rule
from .audio_codec import HqConflictRule
from ..common import dash, seps
from ..common.pattern import is_disabled
from ..common.validators import seps_before, seps_after, or_
def source(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'source'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], private_parent=True, children=True)
rebulk = rebulk.defaults(name='source',
tags=['video-codec-prefix', 'streaming_service.suffix'],
validate_all=True,
validator={'__parent__': or_(seps_before, seps_after)})
rip_prefix = '(?P<other>Rip)-?'
rip_suffix = '-?(?P<other>Rip)'
rip_optional_suffix = '(?:' + rip_suffix + ')?'
def build_source_pattern(*patterns, **kwargs):
"""Helper pattern to build source pattern."""
prefix_format = kwargs.get('prefix') or ''
suffix_format = kwargs.get('suffix') or ''
string_format = prefix_format + '({0})' + suffix_format
return [string_format.format(pattern) for pattern in patterns]
def demote_other(match, other): # pylint: disable=unused-argument
"""Default conflict solver with 'other' property."""
return other if other.name == 'other' or other.name == 'release_group' else '__default__'
rebulk.regex(*build_source_pattern('VHS', suffix=rip_optional_suffix),
value={'source': 'VHS', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('CAM', suffix=rip_optional_suffix),
value={'source': 'Camera', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('HD-?CAM', suffix=rip_optional_suffix),
value={'source': 'HD Camera', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('TELESYNC', 'TS', suffix=rip_optional_suffix),
value={'source': 'Telesync', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('HD-?TELESYNC', 'HD-?TS', suffix=rip_optional_suffix),
value={'source': 'HD Telesync', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('WORKPRINT', 'WP'), value='Workprint')
rebulk.regex(*build_source_pattern('TELECINE', 'TC', suffix=rip_optional_suffix),
value={'source': 'Telecine', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('HD-?TELECINE', 'HD-?TC', suffix=rip_optional_suffix),
value={'source': 'HD Telecine', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('PPV', suffix=rip_optional_suffix),
value={'source': 'Pay-per-view', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('SD-?TV', suffix=rip_optional_suffix),
value={'source': 'TV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('TV', suffix=rip_suffix), # TV is too common to allow matching
value={'source': 'TV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('TV', 'SD-?TV', prefix=rip_prefix),
value={'source': 'TV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('TV-?(?=Dub)'), value='TV')
rebulk.regex(*build_source_pattern('DVB', 'PD-?TV', suffix=rip_optional_suffix),
value={'source': 'Digital TV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('DVD', suffix=rip_optional_suffix),
value={'source': 'DVD', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('DM', suffix=rip_optional_suffix),
value={'source': 'Digital Master', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('VIDEO-?TS', 'DVD-?R(?:$|(?!E))', # 'DVD-?R(?:$|^E)' => DVD-Real ...
'DVD-?9', 'DVD-?5'), value='DVD')
rebulk.regex(*build_source_pattern('HD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other,
value={'source': 'HDTV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('TV-?HD', suffix=rip_suffix), conflict_solver=demote_other,
value={'source': 'HDTV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('TV', suffix='-?(?P<other>Rip-?HD)'), conflict_solver=demote_other,
value={'source': 'HDTV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('VOD', suffix=rip_optional_suffix),
value={'source': 'Video on Demand', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('WEB', 'WEB-?DL', suffix=rip_suffix),
value={'source': 'Web', 'other': 'Rip'})
# WEBCap is a synonym to WEBRip, mostly used by non english
rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)', suffix=rip_optional_suffix),
value={'source': 'Web', 'other': 'Rip', 'another': 'Rip'})
rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'DL-?WEB', 'DL(?=-?Mux)'),
value={'source': 'Web'})
rebulk.regex('(WEB)', value='Web', tags='weak.source')
rebulk.regex(*build_source_pattern('HD-?DVD', suffix=rip_optional_suffix),
value={'source': 'HD-DVD', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('Blu-?ray', 'BD', 'BD[59]', 'BD25', 'BD50', suffix=rip_optional_suffix),
value={'source': 'Blu-ray', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('(?P<another>BR)-?(?=Scr(?:eener)?)', '(?P<another>BR)-?(?=Mux)'), # BRRip
value={'source': 'Blu-ray', 'another': 'Reencoded'})
rebulk.regex(*build_source_pattern('(?P<another>BR)', suffix=rip_suffix), # BRRip
value={'source': 'Blu-ray', 'other': 'Rip', 'another': 'Reencoded'})
rebulk.regex(*build_source_pattern('Ultra-?Blu-?ray', 'Blu-?ray-?Ultra'), value='Ultra HD Blu-ray')
rebulk.regex(*build_source_pattern('AHDTV'), value='Analog HDTV')
rebulk.regex(*build_source_pattern('UHD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other,
value={'source': 'Ultra HDTV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('UHD', suffix=rip_suffix), conflict_solver=demote_other,
value={'source': 'Ultra HDTV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('DSR', 'DTH', suffix=rip_optional_suffix),
value={'source': 'Satellite', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix),
value={'source': 'Satellite', 'other': 'Rip'})
rebulk.rules(ValidateSourcePrefixSuffix, ValidateWeakSource, UltraHdBlurayRule)
return rebulk
class UltraHdBlurayRule(Rule):
"""
Replace other:Ultra HD and source:Blu-ray with source:Ultra HD Blu-ray
"""
dependency = HqConflictRule
consequence = [RemoveMatch, AppendMatch]
@classmethod
def find_ultrahd(cls, matches, start, end, index):
"""Find Ultra HD match."""
return matches.range(start, end, index=index, predicate=(
lambda m: not m.private and m.name == 'other' and m.value == 'Ultra HD'
))
@classmethod
def validate_range(cls, matches, start, end):
"""Validate no holes or invalid matches exist in the specified range."""
return (
not matches.holes(start, end, predicate=lambda m: m.value.strip(seps)) and
not matches.range(start, end, predicate=(
lambda m: not m.private and (
m.name not in ('screen_size', 'color_depth') and (
m.name != 'other' or 'uhdbluray-neighbor' not in m.tags))))
)
def when(self, matches, context):
to_remove = []
to_append = []
for filepart in matches.markers.named('path'):
for match in matches.range(filepart.start, filepart.end, predicate=(
lambda m: not m.private and m.name == 'source' and m.value == 'Blu-ray')):
other = self.find_ultrahd(matches, filepart.start, match.start, -1)
if not other or not self.validate_range(matches, other.end, match.start):
other = self.find_ultrahd(matches, match.end, filepart.end, 0)
if not other or not self.validate_range(matches, match.end, other.start):
if not matches.range(filepart.start, filepart.end, predicate=(
lambda m: m.name == 'screen_size' and m.value == '2160p')):
continue
if other:
other.private = True
new_source = copy.copy(match)
new_source.value = 'Ultra HD Blu-ray'
to_remove.append(match)
to_append.append(new_source)
if to_remove or to_append:
return to_remove, to_append
return False
class ValidateSourcePrefixSuffix(Rule):
"""
Validate source with source prefix, source suffix.
"""
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for filepart in matches.markers.named('path'):
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
match = match.initiator
if not seps_before(match) and \
not matches.range(match.start - 1, match.start - 2,
lambda m: 'source-prefix' in m.tags):
if match.children:
ret.extend(match.children)
ret.append(match)
continue
if not seps_after(match) and \
not matches.range(match.end, match.end + 1,
lambda m: 'source-suffix' in m.tags):
if match.children:
ret.extend(match.children)
ret.append(match)
continue
return ret
class ValidateWeakSource(Rule):
"""
Validate weak source
"""
dependency = [ValidateSourcePrefixSuffix]
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for filepart in matches.markers.named('path'):
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
# if there are more than 1 source in this filepart, just before the year and with holes for the title
# most likely the source is part of the title
if 'weak.source' in match.tags \
and matches.range(match.end, filepart.end, predicate=lambda m: m.name == 'source') \
and matches.holes(filepart.start, match.start,
predicate=lambda m: m.value.strip(seps), index=-1):
if match.children:
ret.extend(match.children)
ret.append(match)
continue
return ret

View file

@ -8,64 +8,30 @@ import re
from rebulk import Rebulk
from rebulk.rules import Rule, RemoveMatch
from ..common.pattern import is_disabled
from ...rules.common import seps, dash
from ...rules.common.validators import seps_before, seps_after
def streaming_service():
def streaming_service(config): # pylint: disable=too-many-statements,unused-argument
"""Streaming service property.
:param config: rule configuration
:type config: dict
:return:
:rtype: Rebulk
"""
rebulk = Rebulk().string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk.defaults(name='streaming_service', tags=['format-prefix'])
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'streaming_service'))
rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk.defaults(name='streaming_service', tags=['source-prefix'])
rebulk.string('AE', 'A&E', value='A&E')
rebulk.string('AMBC', value='ABC')
rebulk.string('AMC', value='AMC')
rebulk.string('AMZN', 'AmazonPrime', value='Amazon Prime')
rebulk.regex('Amazon-Prime', value='Amazon Prime')
rebulk.string('AS', 'AdultSwim', value='Adult Swim')
rebulk.regex('Adult-Swim', value='Adult Swim')
rebulk.string('iP', 'BBCiPlayer', value='BBC iPlayer')
rebulk.regex('BBC-iPlayer', value='BBC iPlayer')
rebulk.string('CBS', value='CBS')
rebulk.string('CC', 'ComedyCentral', value='Comedy Central')
rebulk.regex('Comedy-Central', value='Comedy Central')
rebulk.string('CR', 'CrunchyRoll', value='Crunchy Roll')
rebulk.regex('Crunchy-Roll', value='Crunchy Roll')
rebulk.string('CW', 'TheCW', value='The CW')
rebulk.regex('The-CW', value='The CW')
rebulk.string('DISC', 'Discovery', value='Discovery')
rebulk.string('DIY', value='DIY Network')
rebulk.string('DSNY', 'Disney', value='Disney')
rebulk.string('EPIX', 'ePix', value='ePix')
rebulk.string('HBO', 'HBOGo', value='HBO Go')
rebulk.regex('HBO-Go', value='HBO Go')
rebulk.string('HIST', 'History', value='History')
rebulk.string('ID', value='Investigation Discovery')
rebulk.string('IFC', 'IFC', value='IFC')
rebulk.string('PBS', 'PBS', value='PBS')
rebulk.string('NATG', 'NationalGeographic', value='National Geographic')
rebulk.regex('National-Geographic', value='National Geographic')
rebulk.string('NBA', 'NBATV', value='NBA TV')
rebulk.regex('NBA-TV', value='NBA TV')
rebulk.string('NBC', value='NBC')
rebulk.string('NFL', value='NFL')
rebulk.string('NICK', 'Nickelodeon', value='Nickelodeon')
rebulk.string('NF', 'Netflix', value='Netflix')
rebulk.string('iTunes', value='iTunes')
rebulk.string('RTE', value='RTÉ One')
rebulk.string('SESO', 'SeeSo', value='SeeSo')
rebulk.string('SPKE', 'SpikeTV', 'Spike TV', value='Spike TV')
rebulk.string('SYFY', 'Syfy', value='Syfy')
rebulk.string('TFOU', 'TFou', value='TFou')
rebulk.string('TLC', value='TLC')
rebulk.string('TV3', value='TV3 Ireland')
rebulk.string('TV4', value='TV4 Sweeden')
rebulk.string('TVL', 'TVLand', 'TV Land', value='TV Land')
rebulk.string('UFC', value='UFC')
rebulk.string('USAN', value='USA Network')
for value, items in config.items():
patterns = items if isinstance(items, list) else [items]
for pattern in patterns:
if pattern.startswith('re:'):
rebulk.regex(pattern, value=value)
else:
rebulk.string(pattern, value=value)
rebulk.rules(ValidateStreamingService)
@ -75,11 +41,11 @@ def streaming_service():
class ValidateStreamingService(Rule):
"""Validate streaming service matches."""
priority = 32
priority = 128
consequence = RemoveMatch
def when(self, matches, context):
"""Streaming service is always before format.
"""Streaming service is always before source.
:param matches:
:type matches: rebulk.match.Matches
@ -93,16 +59,20 @@ class ValidateStreamingService(Rule):
previous_match = matches.previous(service, lambda match: 'streaming_service.prefix' in match.tags, 0)
has_other = service.initiator and service.initiator.children.named('other')
if not has_other and \
(not next_match or matches.holes(service.end, next_match.start,
predicate=lambda match: match.value.strip(seps))) and \
(not previous_match or matches.holes(previous_match.end, service.start,
predicate=lambda match: match.value.strip(seps))):
to_remove.append(service)
continue
if not has_other:
if (not next_match or
matches.holes(service.end, next_match.start,
predicate=lambda match: match.value.strip(seps)) or
not seps_before(service)):
if (not previous_match or
matches.holes(previous_match.end, service.start,
predicate=lambda match: match.value.strip(seps)) or
not seps_after(service)):
to_remove.append(service)
continue
if service.value == 'Comedy Central':
# Current match is a valid streaming service, removing invalid closed caption (CC) matches
to_remove.extend(matches.named('other', predicate=lambda match: match.value == 'CC'))
# Current match is a valid streaming service, removing invalid Criterion Collection (CC) matches
to_remove.extend(matches.named('edition', predicate=lambda match: match.value == 'Criterion'))
return to_remove

View file

@ -8,21 +8,31 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, AppendTags
from rebulk.formatters import formatters
from .film import FilmTitleRule
from .language import SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule
from .language import (
SubtitlePrefixLanguageRule,
SubtitleSuffixLanguageRule,
SubtitleExtensionRule,
NON_SPECIFIC_LANGUAGES
)
from ..common import seps, title_seps
from ..common.comparators import marker_sorted
from ..common.expected import build_expected_function
from ..common.formatters import cleanup, reorder_title
from ..common.pattern import is_disabled
from ..common.validators import seps_surround
def title():
def title(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().rules(TitleFromPosition, PreferTitleWithYear)
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'title'))
rebulk.rules(TitleFromPosition, PreferTitleWithYear)
expected_title = build_expected_function('expected_title')
@ -83,18 +93,25 @@ class TitleBaseRule(Rule):
:rtype:
"""
cropped_holes = []
group_markers = matches.markers.named('group')
for group_marker in group_markers:
path_marker = matches.markers.at_match(group_marker, predicate=lambda m: m.name == 'path', index=0)
if path_marker and path_marker.span == group_marker.span:
group_markers.remove(group_marker)
for hole in holes:
group_markers = matches.markers.named('group')
cropped_holes.extend(hole.crop(group_markers))
return cropped_holes
def is_ignored(self, match):
@staticmethod
def is_ignored(match):
"""
Ignore matches when scanning for title (hole).
Full word language and countries won't be ignored if they are uppercase.
"""
return not (len(match) > 3 and match.raw.isupper()) and match.name in ['language', 'country', 'episode_details']
return not (len(match) > 3 and match.raw.isupper()) and match.name in ('language', 'country', 'episode_details')
def should_keep(self, match, to_keep, matches, filepart, hole, starting):
"""
@ -114,7 +131,7 @@ class TitleBaseRule(Rule):
:return:
:rtype:
"""
if match.name in ['language', 'country']:
if match.name in ('language', 'country'):
# Keep language if exactly matching the hole.
if len(hole.value) == len(match.raw):
return True
@ -125,9 +142,10 @@ class TitleBaseRule(Rule):
for outside in outside_matches:
other_languages.extend(matches.range(outside.start, outside.end,
lambda c_match: c_match.name == match.name and
c_match not in to_keep))
c_match not in to_keep and
c_match.value not in NON_SPECIFIC_LANGUAGES))
if not other_languages:
if not other_languages and (not starting or len(match.raw) <= 3):
return True
return False
@ -145,7 +163,7 @@ class TitleBaseRule(Rule):
return match.start >= hole.start and match.end <= hole.end
return True
def check_titles_in_filepart(self, filepart, matches, context):
def check_titles_in_filepart(self, filepart, matches, context): # pylint:disable=inconsistent-return-statements
"""
Find title in filepart (ignoring language)
"""
@ -154,12 +172,11 @@ class TitleBaseRule(Rule):
holes = matches.holes(start, end + 1, formatter=formatters(cleanup, reorder_title),
ignore=self.is_ignored,
predicate=lambda hole: hole.value)
predicate=lambda m: m.value)
holes = self.holes_process(holes, matches)
for hole in holes:
# pylint:disable=cell-var-from-loop
if not hole or (self.hole_filter and not self.hole_filter(hole, matches)):
continue
@ -170,8 +187,8 @@ class TitleBaseRule(Rule):
if ignored_matches:
for ignored_match in reversed(ignored_matches):
# pylint:disable=undefined-loop-variable
trailing = matches.chain_before(hole.end, seps, predicate=lambda match: match == ignored_match)
# pylint:disable=undefined-loop-variable, cell-var-from-loop
trailing = matches.chain_before(hole.end, seps, predicate=lambda m: m == ignored_match)
if trailing:
should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, False)
if should_keep:
@ -188,7 +205,7 @@ class TitleBaseRule(Rule):
for ignored_match in ignored_matches:
if ignored_match not in to_keep:
starting = matches.chain_after(hole.start, seps,
predicate=lambda match: match == ignored_match)
predicate=lambda m: m == ignored_match)
if starting:
should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, True)
if should_keep:
@ -214,7 +231,7 @@ class TitleBaseRule(Rule):
hole.tags = self.match_tags
if self.alternative_match_name:
# Split and keep values that can be a title
titles = hole.split(title_seps, lambda match: match.value)
titles = hole.split(title_seps, lambda m: m.value)
for title_match in list(titles[1:]):
previous_title = titles[titles.index(title_match) - 1]
separator = matches.input_string[previous_title.end:title_match.start]
@ -231,14 +248,15 @@ class TitleBaseRule(Rule):
return titles, to_remove
def when(self, matches, context):
ret = []
to_remove = []
if matches.named(self.match_name, lambda match: 'expected' in match.tags):
return
return False
fileparts = [filepart for filepart in list(marker_sorted(matches.markers.named('path'), matches))
if not self.filepart_filter or self.filepart_filter(filepart, matches)]
to_remove = []
# Priorize fileparts containing the year
years_fileparts = []
for filepart in fileparts:
@ -246,7 +264,6 @@ class TitleBaseRule(Rule):
if year_match:
years_fileparts.append(filepart)
ret = []
for filepart in fileparts:
try:
years_fileparts.remove(filepart)
@ -268,7 +285,9 @@ class TitleBaseRule(Rule):
ret.extend(titles)
to_remove.extend(to_remove_c)
return ret, to_remove
if ret or to_remove:
return ret, to_remove
return False
class TitleFromPosition(TitleBaseRule):
@ -282,6 +301,9 @@ class TitleFromPosition(TitleBaseRule):
def __init__(self):
super(TitleFromPosition, self).__init__('title', ['title'], 'alternative_title')
def enabled(self, context):
return not is_disabled(context, 'alternative_title')
class PreferTitleWithYear(Rule):
"""
@ -302,7 +324,7 @@ class PreferTitleWithYear(Rule):
if filepart:
year_match = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year', 0)
if year_match:
group = matches.markers.at_match(year_match, lambda group: group.name == 'group')
group = matches.markers.at_match(year_match, lambda m: m.name == 'group')
if group:
with_year_in_group.append(title_match)
else:
@ -310,16 +332,18 @@ class PreferTitleWithYear(Rule):
to_tag = []
if with_year_in_group:
title_values = set([title_match.value for title_match in with_year_in_group])
title_values = {title_match.value for title_match in with_year_in_group}
to_tag.extend(with_year_in_group)
elif with_year:
title_values = set([title_match.value for title_match in with_year])
title_values = {title_match.value for title_match in with_year}
to_tag.extend(with_year)
else:
title_values = set([title_match.value for title_match in titles])
title_values = {title_match.value for title_match in titles}
to_remove = []
for title_match in titles:
if title_match.value not in title_values:
to_remove.append(title_match)
return to_remove, to_tag
if to_remove or to_tag:
return to_remove, to_tag
return False

View file

@ -6,6 +6,7 @@ type property
from rebulk import CustomRule, Rebulk, POST_PROCESS
from rebulk.match import Match
from ..common.pattern import is_disabled
from ...rules.processors import Processors
@ -19,13 +20,19 @@ def _type(matches, value):
matches.append(Match(len(matches.input_string), len(matches.input_string), name='type', value=value))
def type_():
def type_(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
return Rebulk().rules(TypeProcessor)
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'type'))
rebulk = rebulk.rules(TypeProcessor)
return rebulk
class TypeProcessor(CustomRule):
@ -45,9 +52,10 @@ class TypeProcessor(CustomRule):
episode = matches.named('episode')
season = matches.named('season')
absolute_episode = matches.named('absolute_episode')
episode_details = matches.named('episode_details')
if episode or season or episode_details:
if episode or season or episode_details or absolute_episode:
return 'episode'
film = matches.named('film')

View file

@ -3,47 +3,76 @@
"""
video_codec and video_profile property
"""
from rebulk import Rebulk, Rule, RemoveMatch
from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch
from ..common import dash
from ..common.pattern import is_disabled
from ..common.validators import seps_after, seps_before, seps_surround
def video_codec():
def video_codec(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk.defaults(name="video_codec", tags=['format-suffix', 'streaming_service.suffix'])
rebulk = Rebulk()
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk.defaults(name="video_codec",
tags=['source-suffix', 'streaming_service.suffix'],
disabled=lambda context: is_disabled(context, 'video_codec'))
rebulk.regex(r"Rv\d{2}", value="Real")
rebulk.regex("Mpeg2", value="Mpeg2")
rebulk.regex("DVDivX", "DivX", value="DivX")
rebulk.regex("XviD", value="XviD")
rebulk.regex("[hx]-?264(?:-?AVC(HD)?)?", "MPEG-?4(?:-?AVC(HD)?)", "AVC(?:HD)?", value="h264")
rebulk.regex("[hx]-?265(?:-?HEVC)?", "HEVC", value="h265")
rebulk.regex('(?P<video_codec>hevc)(?P<video_profile>10)', value={'video_codec': 'h265', 'video_profile': '10bit'},
rebulk.regex(r'Rv\d{2}', value='RealVideo')
rebulk.regex('Mpe?g-?2', '[hx]-?262', value='MPEG-2')
rebulk.string("DVDivX", "DivX", value="DivX")
rebulk.string('XviD', value='Xvid')
rebulk.regex('VC-?1', value='VC-1')
rebulk.string('VP7', value='VP7')
rebulk.string('VP8', 'VP80', value='VP8')
rebulk.string('VP9', value='VP9')
rebulk.regex('[hx]-?263', value='H.263')
rebulk.regex('[hx]-?264', '(MPEG-?4)?AVC(?:HD)?', value='H.264')
rebulk.regex('[hx]-?265', 'HEVC', value='H.265')
rebulk.regex('(?P<video_codec>hevc)(?P<color_depth>10)', value={'video_codec': 'H.265', 'color_depth': '10-bit'},
tags=['video-codec-suffix'], children=True)
# http://blog.mediacoderhq.com/h264-profiles-and-levels/
# http://fr.wikipedia.org/wiki/H.264
rebulk.defaults(name="video_profile", validator=seps_surround)
# https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC
rebulk.defaults(clear=True,
name="video_profile",
validator=seps_surround,
disabled=lambda context: is_disabled(context, 'video_profile'))
rebulk.regex('10.?bits?', 'Hi10P?', 'YUV420P10', value='10bit')
rebulk.regex('8.?bits?', value='8bit')
rebulk.string('BP', value='Baseline', tags='video_profile.rule')
rebulk.string('XP', 'EP', value='Extended', tags='video_profile.rule')
rebulk.string('MP', value='Main', tags='video_profile.rule')
rebulk.string('HP', 'HiP', value='High', tags='video_profile.rule')
rebulk.string('BP', value='BP', tags='video_profile.rule')
rebulk.string('XP', 'EP', value='XP', tags='video_profile.rule')
rebulk.string('MP', value='MP', tags='video_profile.rule')
rebulk.string('HP', 'HiP', value='HP', tags='video_profile.rule')
rebulk.regex('Hi422P', value='Hi422P', tags='video_profile.rule')
rebulk.regex('Hi444PP', value='Hi444PP', tags='video_profile.rule')
# https://en.wikipedia.org/wiki/Scalable_Video_Coding
rebulk.string('SC', 'SVC', value='Scalable Video Coding', tags='video_profile.rule')
# https://en.wikipedia.org/wiki/AVCHD
rebulk.regex('AVC(?:HD)?', value='Advanced Video Codec High Definition', tags='video_profile.rule')
# https://en.wikipedia.org/wiki/H.265/HEVC
rebulk.string('HEVC', value='High Efficiency Video Coding', tags='video_profile.rule')
rebulk.string('DXVA', value='DXVA', name='video_api')
rebulk.regex('Hi422P', value='High 4:2:2')
rebulk.regex('Hi444PP', value='High 4:4:4 Predictive')
rebulk.regex('Hi10P?', value='High 10') # no profile validation is required
rebulk.string('DXVA', value='DXVA', name='video_api',
disabled=lambda context: is_disabled(context, 'video_api'))
rebulk.defaults(clear=True,
name='color_depth',
validator=seps_surround,
disabled=lambda context: is_disabled(context, 'color_depth'))
rebulk.regex('12.?bits?', value='12-bit')
rebulk.regex('10.?bits?', 'YUV420P10', 'Hi10P?', value='10-bit')
rebulk.regex('8.?bits?', value='8-bit')
rebulk.rules(ValidateVideoCodec, VideoProfileRule)
@ -52,11 +81,14 @@ def video_codec():
class ValidateVideoCodec(Rule):
"""
Validate video_codec with format property or separated
Validate video_codec with source property or separated
"""
priority = 64
consequence = RemoveMatch
def enabled(self, context):
return not is_disabled(context, 'video_codec')
def when(self, matches, context):
ret = []
for codec in matches.named('video_codec'):
@ -77,11 +109,16 @@ class VideoProfileRule(Rule):
"""
consequence = RemoveMatch
def enabled(self, context):
return not is_disabled(context, 'video_profile')
def when(self, matches, context):
profile_list = matches.named('video_profile', lambda match: 'video_profile.rule' in match.tags)
ret = []
for profile in profile_list:
codec = matches.previous(profile, lambda match: match.name == 'video_codec')
codec = matches.at_span(profile.span, lambda match: match.name == 'video_codec', 0)
if not codec:
codec = matches.previous(profile, lambda match: match.name == 'video_codec')
if not codec:
codec = matches.next(profile, lambda match: match.name == 'video_codec')
if not codec:

View file

@ -9,28 +9,35 @@ from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch
from ..common import seps
from ..common.formatters import cleanup
from ..common.pattern import is_disabled
from ..common.validators import seps_surround
from ...reutils import build_or_pattern
def website():
def website(config):
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'website'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
rebulk.defaults(name="website")
tlds = [l.strip().decode('utf-8')
for l in resource_stream('guessit', 'tlds-alpha-by-domain.txt').readlines()
if b'--' not in l][1:] # All registered domain extension
with resource_stream('guessit', 'tlds-alpha-by-domain.txt') as tld_file:
tlds = [
tld.strip().decode('utf-8')
for tld in tld_file.readlines()
if b'--' not in tld
][1:] # All registered domain extension
safe_tlds = ['com', 'org', 'net'] # For sure a website extension
safe_subdomains = ['www'] # For sure a website subdomain
safe_prefix = ['co', 'com', 'org', 'net'] # Those words before a tlds are sure
website_prefixes = ['from']
safe_tlds = config['safe_tlds'] # For sure a website extension
safe_subdomains = config['safe_subdomains'] # For sure a website subdomain
safe_prefix = config['safe_prefixes'] # Those words before a tlds are sure
website_prefixes = config['prefixes']
rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
r'\.)+(?:[a-z-]+\.)+(?:'+build_or_pattern(tlds) +
@ -60,7 +67,7 @@ def website():
"""
Validator for next website matches
"""
return any(name in ['season', 'episode', 'year'] for name in match.names)
return match.named('season', 'episode', 'year')
def when(self, matches, context):
to_remove = []
@ -73,7 +80,9 @@ def website():
if not safe:
suffix = matches.next(website_match, PreferTitleOverWebsite.valid_followers, 0)
if suffix:
to_remove.append(website_match)
group = matches.markers.at_match(website_match, lambda marker: marker.name == 'group', 0)
if not group:
to_remove.append(website_match)
return to_remove
rebulk.rules(PreferTitleOverWebsite, ValidateWebsitePrefix)

View file

@ -0,0 +1,335 @@
? vorbis
: options: --exclude audio_codec
-audio_codec: Vorbis
? DTS-ES
: options: --exclude audio_profile
audio_codec: DTS
-audio_profile: Extended Surround
? DTS.ES
: options: --include audio_codec
audio_codec: DTS
-audio_profile: Extended Surround
? 5.1
? 5ch
? 6ch
: options: --exclude audio_channels
-audio_channels: '5.1'
? Movie Title-x01-Other Title.mkv
? Movie Title-x01-Other Title
? directory/Movie Title-x01-Other Title/file.mkv
: options: --exclude bonus
-bonus: 1
-bonus_title: Other Title
? Title-x02-Bonus Title.mkv
: options: --include bonus
bonus: 2
-bonus_title: Other Title
? cd 1of3
: options: --exclude cd
-cd: 1
-cd_count: 3
? This.is.Us
: options: --exclude country
title: This is Us
-country: US
? 2015.01.31
: options: --exclude date
year: 2015
-date: 2015-01-31
? Something 2 mar 2013)
: options: --exclude date
-date: 2013-03-02
? 2012 2009 S01E02 2015 # If no year is marked, the second one is guessed.
: options: --exclude year
-year: 2009
? Director's cut
: options: --exclude edition
-edition: Director's Cut
? 2x5
? 2X5
? 02x05
? 2X05
? 02x5
? S02E05
? s02e05
? s02e5
? s2e05
? s02ep05
? s2EP5
: options: --exclude season
-season: 2
-episode: 5
? 2x6
? 2X6
? 02x06
? 2X06
? 02x6
? S02E06
? s02e06
? s02e6
? s2e06
? s02ep06
? s2EP6
: options: --exclude episode
-season: 2
-episode: 6
? serie Season 2 other
: options: --exclude season
-season: 2
? Some Dummy Directory/S02 Some Series/E01-Episode title.mkv
: options: --exclude episode_title
-episode_title: Episode title
season: 2
episode: 1
? Another Dummy Directory/S02 Some Series/E01-Episode title.mkv
: options: --include season --include episode
-episode_title: Episode title
season: 2
episode: 1
# pattern contains season and episode: it wont work enabling only one
? Some Series S03E01E02
: options: --include episode
-season: 3
-episode: [1, 2]
# pattern contains season and episode: it wont work enabling only one
? Another Series S04E01E02
: options: --include season
-season: 4
-episode: [1, 2]
? Show.Name.Season.4.Episode.1
: options: --include episode
-season: 4
episode: 1
? Another.Show.Name.Season.4.Episode.1
: options: --include season
season: 4
-episode: 1
? Some Series S01 02 03
: options: --exclude season
-season: [1, 2, 3]
? Some Series E01 02 04
: options: --exclude episode
-episode: [1, 2, 4]
? A very special episode s06 special
: options: -t episode --exclude episode_details
season: 6
-episode_details: Special
? S01D02.3-5-GROUP
: options: --exclude disc
-season: 1
-disc: [2, 3, 4, 5]
-episode: [2, 3, 4, 5]
? S01D02&4-6&8
: options: --exclude season
-season: 1
-disc: [2, 4, 5, 6, 8]
-episode: [2, 4, 5, 6, 8]
? Film Title-f01-Series Title.mkv
: options: --exclude film
-film: 1
-film_title: Film Title
? Another Film Title-f01-Series Title.mkv
: options: --exclude film_title
film: 1
-film_title: Film Title
? English
? .ENG.
: options: --exclude language
-language: English
? SubFrench
? SubFr
? STFr
: options: --exclude subtitle_language
-language: French
-subtitle_language: French
? ST.FR
: options: --exclude subtitle_language
language: French
-subtitle_language: French
? ENG.-.sub.FR
? ENG.-.FR Sub
: options: --include language
language: [English, French]
-subtitle_language: French
? ENG.-.SubFR
: options: --include language
language: English
-subtitle_language: French
? ENG.-.FRSUB
? ENG.-.FRSUBS
? ENG.-.FR-SUBS
: options: --include subtitle_language
-language: English
subtitle_language: French
? DVD.Real.XViD
? DVD.fix.XViD
: options: --exclude other
-other: Fix
-proper_count: 1
? Part 3
? Part III
? Part Three
? Part Trois
? Part3
: options: --exclude part
-part: 3
? Some.Title.XViD-by.Artik[SEDG].avi
: options: --exclude release_group
-release_group: Artik[SEDG]
? "[ABC] Some.Title.avi"
? some/folder/[ABC]Some.Title.avi
: options: --exclude release_group
-release_group: ABC
? 360p
? 360px
? "360"
? +500x360
: options: --exclude screen_size
-screen_size: 360p
? 640x360
: options: --exclude aspect_ratio
screen_size: 360p
-aspect_ratio: 1.778
? 8196x4320
: options: --exclude screen_size
-screen_size: 4320p
-aspect_ratio: 1.897
? 4.3gb
: options: --exclude size
-size: 4.3GB
? VhS_rip
? VHS.RIP
: options: --exclude source
-source: VHS
-other: Rip
? DVD.RIP
: options: --include other
-source: DVD
-other: Rip
? Title Only.avi
: options: --exclude title
-title: Title Only
? h265
? x265
? h.265
? x.265
? hevc
: options: --exclude video_codec
-video_codec: H.265
? hevc10
: options: --include color_depth
-video_codec: H.265
-color_depth: 10-bit
? HEVC-YUV420P10
: options: --include color_depth
-video_codec: H.265
color_depth: 10-bit
? h265-HP
: options: --exclude video_profile
video_codec: H.265
-video_profile: High
? House.of.Cards.2013.S02E03.1080p.NF.WEBRip.DD5.1.x264-NTb.mkv
? House.of.Cards.2013.S02E03.1080p.Netflix.WEBRip.DD5.1.x264-NTb.mkv
: options: --exclude streaming_service
-streaming_service: Netflix
? wawa.co.uk
: options: --exclude website
-website: wawa.co.uk
? movie.mp4
: options: --exclude mimetype
-mimetype: video/mp4
? another movie.mkv
: options: --exclude container
-container: mkv
? series s02e01
: options: --exclude type
-type: episode
? series s02e01
: options: --exclude type
-type: episode
? Hotel.Hell.S01E01.720p.DD5.1.448kbps-ALANiS
: options: --exclude audio_bit_rate
-audio_bit_rate: 448Kbps
? Katy Perry - Pepsi & Billboard Summer Beats Concert Series 2012 1080i HDTV 20 Mbps DD2.0 MPEG2-TrollHD.ts
: options: --exclude video_bit_rate
-video_bit_rate: 20Mbps
? "[Figmentos] Monster 34 - At the End of Darkness [781219F1].mkv"
: options: --exclude crc32
-crc32: 781219F1
? 1080p25
: options: --exclude frame_rate
screen_size: 1080p
-frame_rate: 25fps
? 1080p25
: options: --exclude screen_size
-screen_size: 1080p
-frame_rate: 25fps
? 1080p25
: options: --include frame_rate
-screen_size: 1080p
-frame_rate: 25fps
? 1080p 30fps
: options: --exclude screen_size
-screen_size: 1080p
frame_rate: 30fps

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -8,22 +8,25 @@
? +lame3.100
: audio_codec: MP3
? +MP2
: audio_codec: MP2
? +DolbyDigital
? +DD
? +Dolby Digital
? +AC3
: audio_codec: AC3
: audio_codec: Dolby Digital
? +DDP
? +DD+
? +EAC3
: audio_codec: EAC3
: audio_codec: Dolby Digital Plus
? +DolbyAtmos
? +Dolby Atmos
? +Atmos
? -Atmosphere
: audio_codec: DolbyAtmos
: audio_codec: Dolby Atmos
? +AAC
: audio_codec: AAC
@ -36,33 +39,34 @@
? +True-HD
? +trueHD
: audio_codec: TrueHD
: audio_codec: Dolby TrueHD
? +True-HD51
? +trueHD51
: audio_codec: TrueHD
: audio_codec: Dolby TrueHD
audio_channels: '5.1'
? +DTSHD
? +DTS HD
? +DTS-HD
: audio_codec: DTS
audio_profile: HD
: audio_codec: DTS-HD
? +DTS-HDma
: audio_codec: DTS
audio_profile: HDMA
? +DTSMA
: audio_codec: DTS-HD
audio_profile: Master Audio
? +AC3-hq
: audio_codec: AC3
audio_profile: HQ
: audio_codec: Dolby Digital
audio_profile: High Quality
? +AAC-HE
: audio_codec: AAC
audio_profile: HE
audio_profile: High Efficiency
? +AAC-LC
: audio_codec: AAC
audio_profile: LC
audio_profile: Low Complexity
? +AAC2.0
? +AAC20
@ -90,8 +94,41 @@
? DD5.1
? DD51
: audio_codec: AC3
: audio_codec: Dolby Digital
audio_channels: '5.1'
? -51
: audio_channels: '5.1'
? DTS-HD.HRA
? DTSHD.HRA
? DTS-HD.HR
? DTSHD.HR
? -HRA
? -HR
: audio_codec: DTS-HD
audio_profile: High Resolution Audio
? DTSES
? DTS-ES
? -ES
: audio_codec: DTS
audio_profile: Extended Surround
? DD-EX
? DDEX
? -EX
: audio_codec: Dolby Digital
audio_profile: EX
? OPUS
: audio_codec: Opus
? Vorbis
: audio_codec: Vorbis
? PCM
: audio_codec: PCM
? LPCM
: audio_codec: LPCM

View file

@ -7,4 +7,4 @@
? Some.Title-DVDRIP-x264-CDP
: cd: !!null
release_group: CDP
video_codec: h264
video_codec: H.264

View file

@ -0,0 +1,467 @@
? is
: title: is
? it
: title: it
? am
: title: am
? mad
: title: mad
? men
: title: men
? man
: title: man
? run
: title: run
? sin
: title: sin
? st
: title: st
? to
: title: to
? 'no'
: title: 'no'
? non
: title: non
? war
: title: war
? min
: title: min
? new
: title: new
? car
: title: car
? day
: title: day
? bad
: title: bad
? bat
: title: bat
? fan
: title: fan
? fry
: title: fry
? cop
: title: cop
? zen
: title: zen
? gay
: title: gay
? fat
: title: fat
? one
: title: one
? cherokee
: title: cherokee
? got
: title: got
? an
: title: an
? as
: title: as
? cat
: title: cat
? her
: title: her
? be
: title: be
? hat
: title: hat
? sun
: title: sun
? may
: title: may
? my
: title: my
? mr
: title: mr
? rum
: title: rum
? pi
: title: pi
? bb
: title: bb
? bt
: title: bt
? tv
: title: tv
? aw
: title: aw
? by
: title: by
? md
: other: Mic Dubbed
? mp
: title: mp
? cd
: title: cd
? in
: title: in
? ad
: title: ad
? ice
: title: ice
? ay
: title: ay
? at
: title: at
? star
: title: star
? so
: title: so
? he
: title: he
? do
: title: do
? ax
: title: ax
? mx
: title: mx
? bas
: title: bas
? de
: title: de
? le
: title: le
? son
: title: son
? ne
: title: ne
? ca
: title: ca
? ce
: title: ce
? et
: title: et
? que
: title: que
? mal
: title: mal
? est
: title: est
? vol
: title: vol
? or
: title: or
? mon
: title: mon
? se
: title: se
? je
: title: je
? tu
: title: tu
? me
: title: me
? ma
: title: ma
? va
: title: va
? au
: country: AU
? lu
: title: lu
? wa
: title: wa
? ga
: title: ga
? ao
: title: ao
? la
: title: la
? el
: title: el
? del
: title: del
? por
: title: por
? mar
: title: mar
? al
: title: al
? un
: title: un
? ind
: title: ind
? arw
: title: arw
? ts
: source: Telesync
? ii
: title: ii
? bin
: title: bin
? chan
: title: chan
? ss
: title: ss
? san
: title: san
? oss
: title: oss
? iii
: title: iii
? vi
: title: vi
? ben
: title: ben
? da
: title: da
? lt
: title: lt
? ch
: title: ch
? sr
: title: sr
? ps
: title: ps
? cx
: title: cx
? vo
: title: vo
? mkv
: container: mkv
? avi
: container: avi
? dmd
: title: dmd
? the
: title: the
? dis
: title: dis
? cut
: title: cut
? stv
: title: stv
? des
: title: des
? dia
: title: dia
? and
: title: and
? cab
: title: cab
? sub
: title: sub
? mia
: title: mia
? rim
: title: rim
? las
: title: las
? une
: title: une
? par
: title: par
? srt
: container: srt
? ano
: title: ano
? toy
: title: toy
? job
: title: job
? gag
: title: gag
? reel
: title: reel
? www
: title: www
? for
: title: for
? ayu
: title: ayu
? csi
: title: csi
? ren
: title: ren
? moi
: title: moi
? sur
: title: sur
? fer
: title: fer
? fun
: title: fun
? two
: title: two
? big
: title: big
? psy
: title: psy
? air
: title: air
? brazil
: title: brazil
? jordan
: title: jordan
? bs
: title: bs
? kz
: title: kz
? gt
: title: gt
? im
: title: im
? pt
: language: pt
? scr
: title: scr
? sd
: title: sd
? hr
: other: High Resolution

View file

@ -5,6 +5,9 @@
: country: US
title: this is title
? This.is.us.title
: title: This is us title
? This.is.Us
: title: This is Us
? This.Is.Us
: options: --no-default-config
title: This Is Us

View file

@ -7,25 +7,57 @@
? Collector
? Collector Edition
? Edition Collector
: edition: Collector Edition
: edition: Collector
? Special Edition
? Edition Special
? -Special
: edition: Special Edition
: edition: Special
? Criterion Edition
? Edition Criterion
? CC
? -Criterion
: edition: Criterion Edition
: edition: Criterion
? Deluxe
? Deluxe Edition
? Edition Deluxe
: edition: Deluxe Edition
: edition: Deluxe
? Super Movie Alternate XViD
? Super Movie Alternative XViD
? Super Movie Alternate Cut XViD
? Super Movie Alternative Cut XViD
: edition: Alternative Cut
? ddc
: edition: Director's Definitive Cut
? IMAX
? IMAX Edition
: edition: IMAX
? ultimate edition
? -ultimate
: edition: Ultimate
? ultimate collector edition
? ultimate collector's edition
? ultimate collectors edition
? -collectors edition
? -ultimate edition
: edition: [Ultimate, Collector]
? ultimate collectors edition dc
: edition: [Ultimate, Collector, Director's Cut]
? fan edit
? fan edition
? fan collection
: edition: Fan
? ultimate fan edit
? ultimate fan edition
? ultimate fan collection
: edition: [Ultimate, Fan]

View file

@ -32,8 +32,6 @@
? +serie Season 2 other
? +serie Saisons 2 other
? +serie Seasons 2 other
? +serie Serie 2 other
? +serie Series 2 other
? +serie Season Two other
? +serie Season II other
: season: 2
@ -156,7 +154,7 @@
? Show.Name.Season.1.3&5.HDTV.XviD-GoodGroup[SomeTrash]
? Show.Name.Season.1.3 and 5.HDTV.XviD-GoodGroup[SomeTrash]
: format: HDTV
: source: HDTV
release_group: GoodGroup[SomeTrash]
season:
- 1
@ -164,12 +162,12 @@
- 5
title: Show Name
type: episode
video_codec: XviD
video_codec: Xvid
? Show.Name.Season.1.2.3-5.HDTV.XviD-GoodGroup[SomeTrash]
? Show.Name.Season.1.2.3~5.HDTV.XviD-GoodGroup[SomeTrash]
? Show.Name.Season.1.2.3 to 5.HDTV.XviD-GoodGroup[SomeTrash]
: format: HDTV
: source: HDTV
release_group: GoodGroup[SomeTrash]
season:
- 1
@ -179,18 +177,19 @@
- 5
title: Show Name
type: episode
video_codec: XviD
video_codec: Xvid
? The.Get.Down.S01EP01.FRENCH.720p.WEBRIP.XVID-STR
: episode: 1
format: WEBRip
source: Web
other: Rip
language: fr
release_group: STR
screen_size: 720p
season: 1
title: The Get Down
type: episode
video_codec: XviD
video_codec: Xvid
? My.Name.Is.Earl.S01E01-S01E21.SWE-SUB
: episode:
@ -269,4 +268,64 @@
? Episode71
? Episode 71
: episode: 71
: episode: 71
? S01D02.3-5-GROUP
: disc: [2, 3, 4, 5]
? S01D02&4-6&8
: disc: [2, 4, 5, 6, 8]
? Something.4x05-06
? Something - 4x05-06
? Something:4x05-06
? Something 4x05-06
? Something-4x05-06
: title: Something
season: 4
episode:
- 5
- 6
? Something.4x05-06
? Something - 4x05-06
? Something:4x05-06
? Something 4x05-06
? Something-4x05-06
: options: -T something
title: something
season: 4
episode:
- 5
- 6
? Colony 23/S01E01.Some.title.mkv
: title: Colony 23
season: 1
episode: 1
episode_title: Some title
? Show.Name.E02.2010.mkv
: options: -t episode
title: Show Name
year: 2010
episode: 2
? Show.Name.E02.S2010.mkv
: options: -t episode
title: Show Name
year: 2010
season: 2010
episode: 2
? Show.Name.E02.2010.mkv
: title: Show Name
year: 2010
episode: 2
? Show.Name.E02.S2010.mkv
: title: Show Name
year: 2010
season: 2010
episode: 2

View file

@ -1,138 +0,0 @@
# Multiple input strings having same expected results can be chained.
# Use - marker to check inputs that should not match results.
? +VHS
? +VHSRip
? +VHS-Rip
? +VhS_rip
? +VHS.RIP
? -VHSAnythingElse
? -SomeVHS stuff
? -VH
? -VHx
? -VHxRip
: format: VHS
? +Cam
? +CamRip
? +CaM Rip
? +Cam_Rip
? +cam.rip
: format: Cam
? +Telesync
? +TS
? +HD TS
? -Hd.Ts # ts file extension
? -HD.TS # ts file extension
? +Hd-Ts
: format: Telesync
? +Workprint
? +workPrint
? +WorkPrint
? +WP
? -Work Print
: format: Workprint
? +Telecine
? +teleCine
? +TC
? -Tele Cine
: format: Telecine
? +PPV
? +ppv-rip
: format: PPV
? -TV
? +SDTV
? +SDTVRIP
? +Rip sd tv
? +TvRip
? +Rip TV
: format: TV
? +DVB
? +DVB-Rip
? +DvBRiP
? +pdTV
? +Pd Tv
: format: DVB
? +DVD
? +DVD-RIP
? +video ts
? +DVDR
? +DVD 9
? +dvd 5
? -dvd ts
: format: DVD
-format: ts
? +HDTV
? +tv rip hd
? +HDtv Rip
? +HdRip
: format: HDTV
? +VOD
? +VodRip
? +vod rip
: format: VOD
? +webrip
? +Web Rip
? +webdlrip
? +web dl rip
? +webcap
? +web cap
: format: WEBRip
? +webdl
? +Web DL
? +webHD
? +WEB hd
? +web
: format: WEB-DL
? +HDDVD
? +hd dvd
? +hdDvdRip
: format: HD-DVD
? +BluRay
? +BluRay rip
? +BD
? +BR
? +BDRip
? +BR rip
? +BD5
? +BD9
? +BD25
? +bd50
: format: BluRay
? XVID.NTSC.DVDR.nfo
: format: DVD
? AHDTV
: format: AHDTV
? dsr
? dsrip
? ds rip
? dsrrip
? dsr rip
? satrip
? sat rip
? dth
? dthrip
? dth rip
: format: SATRip
? HDTC
: format: HDTC
? UHDTV
? UHDRip
: format: UHDTV

View file

@ -36,4 +36,12 @@
? +ENG.-.SubSV
? +ENG.-.SVSUB
: language: English
subtitle_language: Swedish
subtitle_language: Swedish
? The English Patient (1996)
: title: The English Patient
-language: english
? French.Kiss.1995.1080p
: title: French Kiss
-language: french

View file

@ -12,38 +12,35 @@
? +AudioFixed
? +Audio Fix
? +Audio Fixed
: other: AudioFix
: other: Audio Fixed
? +SyncFix
? +SyncFixed
? +Sync Fix
? +Sync Fixed
: other: SyncFix
: other: Sync Fixed
? +DualAudio
? +Dual Audio
: other: DualAudio
: other: Dual Audio
? +ws
? +WideScreen
? +Wide Screen
: other: WideScreen
: other: Widescreen
# Fix and Real must be surround by others properties to be matched.
? DVD.Real.XViD
# Fix must be surround by others properties to be matched.
? DVD.fix.XViD
? -DVD.Real
? -DVD.Fix
? -Real.XViD
? -Fix.XViD
: other: Proper
proper_count: 1
: other: Fix
-proper_count: 1
? -DVD.BlablaBla.Fix.Blablabla.XVID
? -DVD.BlablaBla.Fix.XVID
? -DVD.Fix.Blablabla.XVID
: other: Proper
proper_count: 1
: other: Fix
-proper_count: 1
? DVD.Real.PROPER.REPACK
@ -51,25 +48,27 @@
proper_count: 3
? Proper
? Proper.720p
? +Repack
? +Rerip
: other: Proper
proper_count: 1
? XViD.Fansub
: other: Fansub
: other: Fan Subtitled
? XViD.Fastsub
: other: Fastsub
: other: Fast Subtitled
? +Season Complete
? -Complete
: other: Complete
? R5
: other: Region 5
? RC
: other: R5
: other: Region C
? PreAir
? Pre Air
@ -81,7 +80,7 @@
? Remux
: other: Remux
? 3D
? 3D.2019
: other: 3D
? HD
@ -90,28 +89,23 @@
? FHD
? FullHD
? Full HD
: other: FullHD
: other: Full HD
? UHD
? Ultra
? UltraHD
? Ultra HD
: other: UltraHD
: other: Ultra HD
? mHD # ??
: other: mHD
? HDLight
: other: HDLight
: other: Micro HD
? HQ
: other: HQ
? ddc
: other: DDC
: other: High Quality
? hr
: other: HR
: other: High Resolution
? PAL
: other: PAL
@ -122,15 +116,14 @@
? NTSC
: other: NTSC
? CC
: other: CC
? LDTV
: other: Low Definition
? LD
? LDTV
: other: LD
: other: Line Dubbed
? MD
: other: MD
: other: Mic Dubbed
? -The complete movie
: other: Complete
@ -139,16 +132,38 @@
: title: The complete movie
? +AC3-HQ
: audio_profile: HQ
: audio_profile: High Quality
? Other-HQ
: other: HQ
: other: High Quality
? reenc
? re-enc
? re-encoded
? reencoded
: other: ReEncoded
: other: Reencoded
? CONVERT XViD
: other: Converted
: other: Converted
? +HDRIP # it's a Rip from non specified HD source
: other: [HD, Rip]
? SDR
: other: Standard Dynamic Range
? HDR
? HDR10
? -HDR100
: other: HDR10
? BT2020
? BT.2020
? -BT.20200
? -BT.2021
: other: BT.2020
? Upscaled
? Upscale
: other: Upscaled

View file

@ -42,30 +42,30 @@
? Show.Name.x264-byEMP
: title: Show Name
video_codec: h264
video_codec: H.264
release_group: byEMP
? Show.Name.x264-NovaRip
: title: Show Name
video_codec: h264
video_codec: H.264
release_group: NovaRip
? Show.Name.x264-PARTiCLE
: title: Show Name
video_codec: h264
video_codec: H.264
release_group: PARTiCLE
? Show.Name.x264-POURMOi
: title: Show Name
video_codec: h264
video_codec: H.264
release_group: POURMOi
? Show.Name.x264-RipPourBox
: title: Show Name
video_codec: h264
video_codec: H.264
release_group: RipPourBox
? Show.Name.x264-RiPRG
: title: Show Name
video_codec: h264
video_codec: H.264
release_group: RiPRG

View file

@ -2,68 +2,279 @@
# Use - marker to check inputs that should not match results.
? +360p
? +360px
? +360i
? "+360"
? -360
? +500x360
? -250x360
: screen_size: 360p
? +640x360
? -640x360i
? -684x360i
: screen_size: 360p
aspect_ratio: 1.778
? +360i
: screen_size: 360i
? +480x360i
? -480x360p
? -450x360
: screen_size: 360i
aspect_ratio: 1.333
? +368p
? +368px
? +368i
? "+368"
? -368i
? -368
? +500x368
: screen_size: 368p
? -490x368
? -700x368
: screen_size: 368p
? +492x368p
: screen_size:
aspect_ratio: 1.337
? +654x368
: screen_size: 368p
aspect_ratio: 1.777
? +698x368
: screen_size: 368p
aspect_ratio: 1.897
? +368i
: -screen_size: 368i
? +480p
? +480px
? +480i
? "+480"
? +500x480
? -480i
? -480
? -500x480
? -638x480
? -920x480
: screen_size: 480p
? +640x480
: screen_size: 480p
aspect_ratio: 1.333
? +852x480
: screen_size: 480p
aspect_ratio: 1.775
? +910x480
: screen_size: 480p
aspect_ratio: 1.896
? +500x480
? +500 x 480
? +500 * 480
? +500x480p
? +500X480i
: screen_size: 500x480
aspect_ratio: 1.042
? +480i
? +852x480i
: screen_size: 480i
? +576p
? +576px
? +576i
? "+576"
? +500x576
? -576i
? -576
? -500x576
? -766x576
? -1094x576
: screen_size: 576p
? +768x576
: screen_size: 576p
aspect_ratio: 1.333
? +1024x576
: screen_size: 576p
aspect_ratio: 1.778
? +1092x576
: screen_size: 576p
aspect_ratio: 1.896
? +500x576
: screen_size: 500x576
aspect_ratio: 0.868
? +576i
: screen_size: 576i
? +720p
? +720px
? -720i
? 720hd
? 720pHD
? +720i
? "+720"
? +500x720
? -720
? -500x720
? -950x720
? -1368x720
: screen_size: 720p
? +960x720
: screen_size: 720p
aspect_ratio: 1.333
? +1280x720
: screen_size: 720p
aspect_ratio: 1.778
? +1366x720
: screen_size: 720p
aspect_ratio: 1.897
? +500x720
: screen_size: 500x720
aspect_ratio: 0.694
? +900p
? +900px
? +900i
? "+900"
? +500x900
? -900i
? -900
? -500x900
? -1198x900
? -1710x900
: screen_size: 900p
? +1200x900
: screen_size: 900p
aspect_ratio: 1.333
? +1600x900
: screen_size: 900p
aspect_ratio: 1.778
? +1708x900
: screen_size: 900p
aspect_ratio: 1.898
? +500x900
? +500x900p
? +500x900i
: screen_size: 500x900
aspect_ratio: 0.556
? +900i
: screen_size: 900i
? +1080p
? +1080px
? +1080hd
? +1080pHD
? -1080i
? "+1080"
? +500x1080
? -1080
? -500x1080
? -1438x1080
? -2050x1080
: screen_size: 1080p
? +1440x1080
: screen_size: 1080p
aspect_ratio: 1.333
? +1920x1080
: screen_size: 1080p
aspect_ratio: 1.778
? +2048x1080
: screen_size: 1080p
aspect_ratio: 1.896
? +1080i
? -1080p
: screen_size: 1080i
? 1440p
: screen_size: 1440p
? +500x1080
: screen_size: 500x1080
aspect_ratio: 0.463
? +2160p
? +2160px
? +2160i
? "+2160"
? -2160i
? -2160
? +4096x2160
: screen_size: 4K
? +4k
? -2878x2160
? -4100x2160
: screen_size: 2160p
? +2880x2160
: screen_size: 2160p
aspect_ratio: 1.333
? +3840x2160
: screen_size: 2160p
aspect_ratio: 1.778
? +4098x2160
: screen_size: 2160p
aspect_ratio: 1.897
? +500x2160
: screen_size: 500x2160
aspect_ratio: 0.231
? +4320p
? +4320px
? -4320i
? -4320
? -5758x2160
? -8198x2160
: screen_size: 4320p
? +5760x4320
: screen_size: 4320p
aspect_ratio: 1.333
? +7680x4320
: screen_size: 4320p
aspect_ratio: 1.778
? +8196x4320
: screen_size: 4320p
aspect_ratio: 1.897
? +500x4320
: screen_size: 500x4320
aspect_ratio: 0.116
? Test.File.720hd.bluray
? Test.File.720p24
? Test.File.720p30
? Test.File.720p50
? Test.File.720p60
? Test.File.720p120
: screen_size: 720p
? Test.File.400p
: options:
advanced_config:
screen_size:
progressive: ["400"]
screen_size: 400p
? Test.File2.400p
: options:
advanced_config:
screen_size:
progressive: ["400"]
screen_size: 400p
? Test.File.720p
: options:
advanced_config:
screen_size:
progressive: ["400"]
screen_size: 720p

View file

@ -0,0 +1,323 @@
# Multiple input strings having same expected results can be chained.
# Use - marker to check inputs that should not match results.
? +VHS
? -VHSAnythingElse
? -SomeVHS stuff
? -VH
? -VHx
: source: VHS
-other: Rip
? +VHSRip
? +VHS-Rip
? +VhS_rip
? +VHS.RIP
? -VHS
? -VHxRip
: source: VHS
other: Rip
? +Cam
: source: Camera
-other: Rip
? +CamRip
? +CaM Rip
? +Cam_Rip
? +cam.rip
? -Cam
: source: Camera
other: Rip
? +HDCam
? +HD-Cam
: source: HD Camera
-other: Rip
? +HDCamRip
? +HD-Cam.rip
? -HDCam
? -HD-Cam
: source: HD Camera
other: Rip
? +Telesync
? +TS
: source: Telesync
-other: Rip
? +TelesyncRip
? +TSRip
? -Telesync
? -TS
: source: Telesync
other: Rip
? +HD TS
? -Hd.Ts # ts file extension
? -HD.TS # ts file extension
? +Hd-Ts
: source: HD Telesync
-other: Rip
? +HD TS Rip
? +Hd-Ts-Rip
? -HD TS
? -Hd-Ts
: source: HD Telesync
other: Rip
? +Workprint
? +workPrint
? +WorkPrint
? +WP
? -Work Print
: source: Workprint
-other: Rip
? +Telecine
? +teleCine
? +TC
? -Tele Cine
: source: Telecine
-other: Rip
? +Telecine Rip
? +teleCine-Rip
? +TC-Rip
? -Telecine
? -TC
: source: Telecine
other: Rip
? +HD-TELECINE
? +HDTC
: source: HD Telecine
-other: Rip
? +HD-TCRip
? +HD TELECINE RIP
? -HD-TELECINE
? -HDTC
: source: HD Telecine
other: Rip
? +PPV
: source: Pay-per-view
-other: Rip
? +ppv-rip
? -PPV
: source: Pay-per-view
other: Rip
? -TV
? +SDTV
? +TV-Dub
: source: TV
-other: Rip
? +SDTVRIP
? +Rip sd tv
? +TvRip
? +Rip TV
? -TV
? -SDTV
: source: TV
other: Rip
? +DVB
? +pdTV
? +Pd Tv
: source: Digital TV
-other: Rip
? +DVB-Rip
? +DvBRiP
? +pdtvRiP
? +pd tv RiP
? -DVB
? -pdTV
? -Pd Tv
: source: Digital TV
other: Rip
? +DVD
? +video ts
? +DVDR
? +DVD 9
? +dvd 5
? -dvd ts
: source: DVD
-source: Telesync
-other: Rip
? +DVD-RIP
? -video ts
? -DVD
? -DVDR
? -DVD 9
? -dvd 5
: source: DVD
other: Rip
? +HDTV
: source: HDTV
-other: Rip
? +tv rip hd
? +HDtv Rip
? -HdRip # it's a Rip from non specified HD source
? -HDTV
: source: HDTV
other: Rip
? +VOD
: source: Video on Demand
-other: Rip
? +VodRip
? +vod rip
? -VOD
: source: Video on Demand
other: Rip
? +webrip
? +Web Rip
? +webdlrip
? +web dl rip
? +webcap
? +web cap
? +webcaprip
? +web cap rip
: source: Web
other: Rip
? +webdl
? +Web DL
? +webHD
? +WEB hd
? +web
: source: Web
-other: Rip
? +HDDVD
? +hd dvd
: source: HD-DVD
-other: Rip
? +hdDvdRip
? -HDDVD
? -hd dvd
: source: HD-DVD
other: Rip
? +BluRay
? +BD
? +BD5
? +BD9
? +BD25
? +bd50
: source: Blu-ray
-other: Rip
? +BR-Scr
? +BR.Screener
: source: Blu-ray
other: [Reencoded, Screener]
-language: pt-BR
? +BR-Rip
? +BRRip
: source: Blu-ray
other: [Reencoded, Rip]
-language: pt-BR
? +BluRay rip
? +BDRip
? -BluRay
? -BD
? -BR
? -BR rip
? -BD5
? -BD9
? -BD25
? -bd50
: source: Blu-ray
other: Rip
? XVID.NTSC.DVDR.nfo
: source: DVD
-other: Rip
? +AHDTV
: source: Analog HDTV
-other: Rip
? +dsr
? +dth
: source: Satellite
-other: Rip
? +dsrip
? +ds rip
? +dsrrip
? +dsr rip
? +satrip
? +sat rip
? +dthrip
? +dth rip
? -dsr
? -dth
: source: Satellite
other: Rip
? +UHDTV
: source: Ultra HDTV
-other: Rip
? +UHDRip
? +UHDTV Rip
? -UHDTV
: source: Ultra HDTV
other: Rip
? UHD Bluray
? UHD 2160p Bluray
? UHD 8bit Bluray
? UHD HQ 8bit Bluray
? Ultra Bluray
? Ultra HD Bluray
? Bluray ULTRA
? Bluray Ultra HD
? Bluray UHD
? 4K Bluray
? 2160p Bluray
? UHD 10bit HDR Bluray
? UHD HDR10 Bluray
? -HD Bluray
? -AMERICAN ULTRA (2015) 1080p Bluray
? -American.Ultra.2015.BRRip
? -BRRip XviD AC3-ULTRAS
? -UHD Proper Bluray
: source: Ultra HD Blu-ray
? UHD.BRRip
? UHD.2160p.BRRip
? BRRip.2160p.UHD
? BRRip.[4K-2160p-UHD]
: source: Ultra HD Blu-ray
other: [Reencoded, Rip]
? UHD.2160p.BDRip
? BDRip.[4K-2160p-UHD]
: source: Ultra HD Blu-ray
other: Rip
? DM
: source: Digital Master
? DMRIP
? DM-RIP
: source: Digital Master
other: Rip

View file

@ -30,3 +30,14 @@
? Some.Other title/Some other title.mkv
: title: Some Other title
? This T.I.T.L.E. has dots
? This.T.I.T.L.E..has.dots
: title: This T.I.T.L.E has dots
? This.T.I.T.L.E..has.dots.S01E02.This E.P.T.I.T.L.E.has.dots
: title: This T.I.T.L.E has dots
season: 1
episode: 2
episode_title: This E.P.T.I.T.L.E has dots
type: episode

View file

@ -6,15 +6,19 @@
? Rv30
? rv40
? -xrv40
: video_codec: Real
: video_codec: RealVideo
? mpeg2
? MPEG2
? MPEG-2
? mpg2
? H262
? H.262
? x262
? -mpeg
? -mpeg 2 # Not sure if we should ignore this one ...
? -xmpeg2
? -mpeg2x
: video_codec: Mpeg2
: video_codec: MPEG-2
? DivX
? -div X
@ -26,19 +30,25 @@
? XviD
? xvid
? -x vid
: video_codec: XviD
: video_codec: Xvid
? h263
? x263
? h.263
: video_codec: H.263
? h264
? x264
? h.264
? x.264
? mpeg4-AVC
? AVC
? AVCHD
? -MPEG-4
? -mpeg4
? -mpeg
? -h 265
? -x265
: video_codec: h264
: video_codec: H.264
? h265
? x265
@ -47,13 +57,42 @@
? hevc
? -h 264
? -x264
: video_codec: h265
: video_codec: H.265
? hevc10
? HEVC-YUV420P10
: video_codec: h265
video_profile: 10bit
: video_codec: H.265
color_depth: 10-bit
? h265-HP
: video_codec: h265
video_profile: HP
: video_codec: H.265
video_profile: High
? H.264-SC
: video_codec: H.264
video_profile: Scalable Video Coding
? mpeg4-AVC
: video_codec: H.264
video_profile: Advanced Video Codec High Definition
? AVCHD-SC
? H.264-AVCHD-SC
: video_codec: H.264
video_profile:
- Scalable Video Coding
- Advanced Video Codec High Definition
? VC1
? VC-1
: video_codec: VC-1
? VP7
: video_codec: VP7
? VP8
? VP80
: video_codec: VP8
? VP9
: video_codec: VP9

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,21 @@
{
"titles": [
"13 Reasons Why",
"Star Wars: Episode VII - The Force Awakens",
"3%",
"The 100",
"3 Percent",
"This is Us",
"Open Season 2",
"Game of Thrones",
"The X-Files",
"11.22.63"
],
"suggested": [
"13 Reasons Why",
"Star Wars: Episode VII - The Force Awakens",
"The 100",
"Open Season 2",
"11.22.63"
]
}

View file

@ -1,13 +1,14 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, pointless-string-statement
import json
import os
import sys
import pytest
import six
from ..api import guessit, properties, GuessitException
from ..api import guessit, properties, suggested_expected, GuessitException
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
@ -27,6 +28,18 @@ def test_forced_binary():
assert ret and 'title' in ret and isinstance(ret['title'], six.binary_type)
@pytest.mark.skipif(sys.version_info < (3, 4), reason="Path is not available")
def test_pathlike_object():
try:
from pathlib import Path
path = Path('Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv')
ret = guessit(path)
assert ret and 'title' in ret
except ImportError: # pragma: no-cover
pass
def test_unicode_japanese():
ret = guessit('[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi')
assert ret and 'title' in ret
@ -61,3 +74,10 @@ def test_exception():
assert "An internal error has occured in guessit" in str(excinfo.value)
assert "Guessit Exception Report" in str(excinfo.value)
assert "Please report at https://github.com/guessit-io/guessit/issues" in str(excinfo.value)
def test_suggested_expected():
with open(os.path.join(__location__, 'suggested.json'), 'r') as f:
content = json.load(f)
actual = suggested_expected(content['titles'])
assert actual == content['suggested']

View file

@ -53,6 +53,14 @@ if six.PY2:
"""
def test_ensure_standard_string_class():
class CustomStr(str):
pass
ret = guessit(CustomStr('1080p'), options={'advanced': True})
assert ret and 'screen_size' in ret and not isinstance(ret['screen_size'].input_string, CustomStr)
def test_properties():
props = properties()
assert 'video_codec' in props.keys()

View file

@ -5,7 +5,7 @@ import os
import pytest
from ..options import get_config_file_locations, merge_configurations, load_config_file, ConfigurationException, \
from ..options import get_options_file_locations, merge_options, load_config_file, ConfigurationException, \
load_config
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
@ -15,7 +15,7 @@ def test_config_locations():
homedir = '/root'
cwd = '/root/cwd'
locations = get_config_file_locations(homedir, cwd, True)
locations = get_options_file_locations(homedir, cwd, True)
assert len(locations) == 9
assert '/root/.guessit/options.json' in locations
@ -34,12 +34,12 @@ def test_merge_configurations():
c2 = {'param1': False, 'param2': True, 'param3': False}
c3 = {'param1': False, 'param2': True, 'param3': False}
merged = merge_configurations(c1, c2, c3)
merged = merge_options(c1, c2, c3)
assert not merged['param1']
assert merged['param2']
assert not merged['param3']
merged = merge_configurations(c3, c2, c1)
merged = merge_options(c3, c2, c1)
assert merged['param1']
assert merged['param2']
assert not merged['param3']
@ -50,28 +50,49 @@ def test_merge_configurations_lists():
c2 = {'param1': [2], 'param2': True, 'param3': False}
c3 = {'param1': [3], 'param2': True, 'param3': False}
merged = merge_configurations(c1, c2, c3)
merged = merge_options(c1, c2, c3)
assert merged['param1'] == [1, 2, 3]
assert merged['param2']
assert not merged['param3']
merged = merge_configurations(c3, c2, c1)
merged = merge_options(c3, c2, c1)
assert merged['param1'] == [3, 2, 1]
assert merged['param2']
assert not merged['param3']
def test_merge_configurations_deep():
c1 = {'param1': [1], 'param2': {'d1': [1]}, 'param3': False}
c2 = {'param1': [2], 'param2': {'d1': [2]}, 'param3': False}
c3 = {'param1': [3], 'param2': {'d3': [3]}, 'param3': False}
merged = merge_options(c1, c2, c3)
assert merged['param1'] == [1, 2, 3]
assert merged['param2']['d1'] == [1, 2]
assert merged['param2']['d3'] == [3]
assert 'd2' not in merged['param2']
assert not merged['param3']
merged = merge_options(c3, c2, c1)
assert merged['param1'] == [3, 2, 1]
assert merged['param2']
assert merged['param2']['d1'] == [2, 1]
assert 'd2' not in merged['param2']
assert merged['param2']['d3'] == [3]
assert not merged['param3']
def test_merge_configurations_pristine_all():
c1 = {'param1': [1], 'param2': True, 'param3': False}
c2 = {'param1': [2], 'param2': True, 'param3': False, 'pristine': True}
c3 = {'param1': [3], 'param2': True, 'param3': False}
merged = merge_configurations(c1, c2, c3)
merged = merge_options(c1, c2, c3)
assert merged['param1'] == [2, 3]
assert merged['param2']
assert not merged['param3']
merged = merge_configurations(c3, c2, c1)
merged = merge_options(c3, c2, c1)
assert merged['param1'] == [2, 1]
assert merged['param2']
assert not merged['param3']
@ -82,7 +103,18 @@ def test_merge_configurations_pristine_properties():
c2 = {'param1': [2], 'param2': True, 'param3': False, 'pristine': ['param2', 'param3']}
c3 = {'param1': [3], 'param2': True, 'param3': False}
merged = merge_configurations(c1, c2, c3)
merged = merge_options(c1, c2, c3)
assert merged['param1'] == [1, 2, 3]
assert merged['param2']
assert not merged['param3']
def test_merge_configurations_pristine_properties_deep():
c1 = {'param1': [1], 'param2': {'d1': False}, 'param3': True}
c2 = {'param1': [2], 'param2': {'d1': True}, 'param3': False, 'pristine': ['param2', 'param3']}
c3 = {'param1': [3], 'param2': {'d1': True}, 'param3': False}
merged = merge_options(c1, c2, c3)
assert merged['param1'] == [1, 2, 3]
assert merged['param2']
assert not merged['param3']
@ -93,7 +125,7 @@ def test_merge_configurations_pristine_properties2():
c2 = {'param1': [2], 'param2': True, 'param3': False, 'pristine': ['param1', 'param2', 'param3']}
c3 = {'param1': [3], 'param2': True, 'param3': False}
merged = merge_configurations(c1, c2, c3)
merged = merge_options(c1, c2, c3)
assert merged['param1'] == [2, 3]
assert merged['param2']
assert not merged['param3']
@ -119,24 +151,25 @@ def test_load_config_file():
def test_load_config():
config = load_config({'no_embedded_config': True, 'param1': 'test',
config = load_config({'no_default_config': True, 'param1': 'test',
'config': [os.path.join(__location__, 'config', 'test.yml')]})
assert config['param1'] == 'test'
assert not config.get('param1')
assert config.get('advanced_config') # advanced_config is still loaded from default
assert config['expected_title'] == ['The 100', 'OSS 117']
assert config['yaml'] is True
config = load_config({'no_embedded_config': True, 'param1': 'test'})
config = load_config({'no_default_config': True, 'param1': 'test'})
assert config['param1'] == 'test'
assert not config.get('param1')
assert 'expected_title' not in config
assert 'yaml' not in config
config = load_config({'no_embedded_config': True, 'param1': 'test', 'config': ['false']})
config = load_config({'no_default_config': True, 'param1': 'test', 'config': ['false']})
assert config['param1'] == 'test'
assert not config.get('param1')
assert 'expected_title' not in config
assert 'yaml' not in config

View file

@ -2,36 +2,24 @@
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name
import logging
import os
# io.open supports encoding= in python 2.7
from io import open # pylint: disable=redefined-builtin
import os
import yaml
import six
import babelfish
import pytest
import six # pylint:disable=wrong-import-order
import yaml # pylint:disable=wrong-import-order
from rebulk.remodule import re
from rebulk.utils import is_iterable
from ..options import parse_options, load_config
from ..yamlutils import OrderedDictYAMLLoader
from .. import guessit
from ..options import parse_options
from ..yamlutils import OrderedDictYAMLLoader
logger = logging.getLogger(__name__)
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
filename_predicate = None
string_predicate = None
# filename_predicate = lambda filename: 'episode_title' in filename
# string_predicate = lambda string: '-DVD.BlablaBla.Fix.Blablabla.XVID' in string
class EntryResult(object):
def __init__(self, string, negates=False):
@ -64,10 +52,10 @@ class EntryResult(object):
def __repr__(self):
if self.ok:
return self.string + ': OK!'
elif self.warning:
if self.warning:
return '%s%s: WARNING! (valid=%i, extra=%i)' % ('-' if self.negates else '', self.string, len(self.valid),
len(self.extra))
elif self.error:
if self.error:
return '%s%s: ERROR! (valid=%i, missing=%i, different=%i, extra=%i, others=%i)' % \
('-' if self.negates else '', self.string, len(self.valid), len(self.missing), len(self.different),
len(self.extra), len(self.others))
@ -136,9 +124,51 @@ class TestYml(object):
Use $ marker to check inputs that should not match results.
"""
options_re = re.compile(r'^([ \+-]+)(.*)')
options_re = re.compile(r'^([ +-]+)(.*)')
files, ids = files_and_ids(filename_predicate)
def _get_unique_id(self, collection, base_id):
ret = base_id
i = 2
while ret in collection:
suffix = "-" + str(i)
ret = base_id + suffix
i += 1
return ret
def pytest_generate_tests(self, metafunc):
if 'yml_test_case' in metafunc.fixturenames:
entries = []
entry_ids = []
entry_set = set()
for filename, _ in zip(*files_and_ids()):
with open(os.path.join(__location__, filename), 'r', encoding='utf-8') as infile:
data = yaml.load(infile, OrderedDictYAMLLoader)
last_expected = None
for string, expected in reversed(list(data.items())):
if expected is None:
data[string] = last_expected
else:
last_expected = expected
default = None
try:
default = data['__default__']
del data['__default__']
except KeyError:
pass
for string, expected in data.items():
TestYml.set_default(expected, default)
string = TestYml.fix_encoding(string, expected)
entries.append((filename, string, expected))
unique_id = self._get_unique_id(entry_set, '[' + filename + '] ' + str(string))
entry_set.add(unique_id)
entry_ids.append(unique_id)
metafunc.parametrize('yml_test_case', entries, ids=entry_ids)
@staticmethod
def set_default(expected, default):
@ -147,34 +177,8 @@ class TestYml(object):
if k not in expected:
expected[k] = v
@pytest.mark.parametrize('filename', files, ids=ids)
def test(self, filename, caplog):
caplog.setLevel(logging.INFO)
with open(os.path.join(__location__, filename), 'r', encoding='utf-8') as infile:
data = yaml.load(infile, OrderedDictYAMLLoader)
entries = Results()
last_expected = None
for string, expected in reversed(list(data.items())):
if expected is None:
data[string] = last_expected
else:
last_expected = expected
default = None
try:
default = data['__default__']
del data['__default__']
except KeyError:
pass
for string, expected in data.items():
TestYml.set_default(expected, default)
entry = self.check_data(filename, string, expected)
entries.append(entry)
entries.assert_ok()
def check_data(self, filename, string, expected):
@classmethod
def fix_encoding(cls, string, expected):
if six.PY2:
if isinstance(string, six.text_type):
string = string.encode('utf-8')
@ -187,16 +191,23 @@ class TestYml(object):
expected[k] = v
if not isinstance(string, str):
string = str(string)
if not string_predicate or string_predicate(string): # pylint: disable=not-callable
entry = self.check(string, expected)
if entry.ok:
logger.debug('[' + filename + '] ' + str(entry))
elif entry.warning:
logger.warning('[' + filename + '] ' + str(entry))
elif entry.error:
logger.error('[' + filename + '] ' + str(entry))
for line in entry.details:
logger.error('[' + filename + '] ' + ' ' * 4 + line)
return string
def test_entry(self, yml_test_case):
filename, string, expected = yml_test_case
result = self.check_data(filename, string, expected)
assert not result.error
def check_data(self, filename, string, expected):
entry = self.check(string, expected)
if entry.ok:
logger.debug('[%s] %s', filename, entry)
elif entry.warning:
logger.warning('[%s] %s', filename, entry)
elif entry.error:
logger.error('[%s] %s', filename, entry)
for line in entry.details:
logger.error('[%s] %s', filename, ' ' * 4 + line)
return entry
def check(self, string, expected):
@ -207,12 +218,10 @@ class TestYml(object):
options = {}
if not isinstance(options, dict):
options = parse_options(options)
options['config'] = False
options = load_config(options)
try:
result = guessit(string, options)
except Exception as exc:
logger.error('[' + string + '] Exception: ' + str(exc))
logger.error('[%s] Exception: %s', string, exc)
raise exc
entry = EntryResult(string, negates)
@ -258,10 +267,10 @@ class TestYml(object):
return False
if isinstance(next(iter(values)), babelfish.Language):
# pylint: disable=no-member
expecteds = set([babelfish.Language.fromguessit(expected) for expected in expecteds])
expecteds = {babelfish.Language.fromguessit(expected) for expected in expecteds}
elif isinstance(next(iter(values)), babelfish.Country):
# pylint: disable=no-member
expecteds = set([babelfish.Country.fromguessit(expected) for expected in expecteds])
expecteds = {babelfish.Country.fromguessit(expected) for expected in expecteds}
return values == expecteds
def check_expected(self, result, expected, entry):
@ -274,10 +283,10 @@ class TestYml(object):
if negates_key:
entry.valid.append((expected_key, expected_value))
else:
entry.different.append((expected_key, expected_value, result[expected_key]))
entry.different.append((expected_key, expected_value, result[result_key]))
else:
if negates_key:
entry.different.append((expected_key, expected_value, result[expected_key]))
entry.different.append((expected_key, expected_value, result[result_key]))
else:
entry.valid.append((expected_key, expected_value))
elif not negates_key:

File diff suppressed because it is too large Load diff

View file

@ -3,23 +3,26 @@
"""
Options
"""
try:
from collections import OrderedDict
except ImportError: # pragma: no-cover
from ordereddict import OrderedDict # pylint:disable=import-error
import babelfish
import yaml
import yaml # pylint:disable=wrong-import-order
from .rules.common.quantity import BitRate, FrameRate, Size
class OrderedDictYAMLLoader(yaml.Loader):
class OrderedDictYAMLLoader(yaml.SafeLoader):
"""
A YAML loader that loads mappings into ordered dictionaries.
From https://gist.github.com/enaeseth/844388
"""
def __init__(self, *args, **kwargs):
yaml.Loader.__init__(self, *args, **kwargs)
yaml.SafeLoader.__init__(self, *args, **kwargs)
self.add_constructor(u'tag:yaml.org,2002:map', type(self).construct_yaml_map)
self.add_constructor(u'tag:yaml.org,2002:omap', type(self).construct_yaml_map)
@ -55,17 +58,24 @@ class CustomDumper(yaml.SafeDumper):
"""
Custom YAML Dumper.
"""
pass
pass # pylint:disable=unnecessary-pass
def default_representer(dumper, data):
"""Default representer"""
return dumper.represent_str(str(data))
CustomDumper.add_representer(babelfish.Language, default_representer)
CustomDumper.add_representer(babelfish.Country, default_representer)
CustomDumper.add_representer(BitRate, default_representer)
CustomDumper.add_representer(FrameRate, default_representer)
CustomDumper.add_representer(Size, default_representer)
def ordered_dict_representer(dumper, data):
"""OrderedDict representer"""
return dumper.represent_dict(data)
return dumper.represent_mapping('tag:yaml.org,2002:map', data.items())
CustomDumper.add_representer(OrderedDict, ordered_dict_representer)

View file

@ -4,4 +4,4 @@
Version module
"""
# pragma: no cover
__version__ = '0.9.0'
__version__ = '2.0.1'

217
libs/rebulk/builder.py Normal file
View file

@ -0,0 +1,217 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Base builder class for Rebulk
"""
from abc import ABCMeta, abstractmethod
from copy import deepcopy
from logging import getLogger
from six import add_metaclass
from .loose import set_defaults
from .pattern import RePattern, StringPattern, FunctionalPattern
log = getLogger(__name__).log
@add_metaclass(ABCMeta)
class Builder(object):
"""
Base builder class for patterns
"""
def __init__(self):
self._defaults = {}
self._regex_defaults = {}
self._string_defaults = {}
self._functional_defaults = {}
self._chain_defaults = {}
def reset(self):
"""
Reset all defaults.
:return:
"""
self.__init__()
def defaults(self, **kwargs):
"""
Define default keyword arguments for all patterns
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(kwargs, self._defaults, override=True)
return self
def regex_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(kwargs, self._regex_defaults, override=True)
return self
def string_defaults(self, **kwargs):
"""
Define default keyword arguments for string patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(kwargs, self._string_defaults, override=True)
return self
def functional_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(kwargs, self._functional_defaults, override=True)
return self
def chain_defaults(self, **kwargs):
"""
Define default keyword arguments for patterns chain.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(kwargs, self._chain_defaults, override=True)
return self
def build_re(self, *pattern, **kwargs):
"""
Builds a new regular expression pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._regex_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return RePattern(*pattern, **kwargs)
def build_string(self, *pattern, **kwargs):
"""
Builds a new string pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._string_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return StringPattern(*pattern, **kwargs)
def build_functional(self, *pattern, **kwargs):
"""
Builds a new functional pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._functional_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return FunctionalPattern(*pattern, **kwargs)
def build_chain(self, **kwargs):
"""
Builds a new patterns chain
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
from .chain import Chain
set_defaults(self._chain_defaults, kwargs)
set_defaults(self._defaults, kwargs)
chain = Chain(self, **kwargs)
chain._defaults = deepcopy(self._defaults) # pylint: disable=protected-access
chain._regex_defaults = deepcopy(self._regex_defaults) # pylint: disable=protected-access
chain._functional_defaults = deepcopy(self._functional_defaults) # pylint: disable=protected-access
chain._string_defaults = deepcopy(self._string_defaults) # pylint: disable=protected-access
chain._chain_defaults = deepcopy(self._chain_defaults) # pylint: disable=protected-access
return chain
@abstractmethod
def pattern(self, *pattern):
"""
Register a list of Pattern instance
:param pattern:
:return:
"""
pass
def regex(self, *pattern, **kwargs):
"""
Add re pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
return self.pattern(self.build_re(*pattern, **kwargs))
def string(self, *pattern, **kwargs):
"""
Add string pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
return self.pattern(self.build_string(*pattern, **kwargs))
def functional(self, *pattern, **kwargs):
"""
Add functional pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
functional = self.build_functional(*pattern, **kwargs)
return self.pattern(functional)
def chain(self, **kwargs):
"""
Add patterns chain, using configuration of this rebulk
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
chain = self.build_chain(**kwargs)
self.pattern(chain)
return chain

View file

@ -6,9 +6,10 @@ Chain patterns and handle repetiting capture group
# pylint: disable=super-init-not-called
import itertools
from .loose import call, set_defaults
from .builder import Builder
from .loose import call
from .match import Match, Matches
from .pattern import Pattern, filter_match_kwargs
from .pattern import Pattern, filter_match_kwargs, BasePattern
from .remodule import re
@ -19,150 +20,46 @@ class _InvalidChainException(Exception):
pass
class Chain(Pattern):
class Chain(Pattern, Builder):
"""
Definition of a pattern chain to search for.
"""
def __init__(self, rebulk, chain_breaker=None, **kwargs):
call(super(Chain, self).__init__, **kwargs)
def __init__(self, parent, chain_breaker=None, **kwargs):
Builder.__init__(self)
call(Pattern.__init__, self, **kwargs)
self._kwargs = kwargs
self._match_kwargs = filter_match_kwargs(kwargs)
self._defaults = {}
self._regex_defaults = {}
self._string_defaults = {}
self._functional_defaults = {}
if callable(chain_breaker):
self.chain_breaker = chain_breaker
else:
self.chain_breaker = None
self.rebulk = rebulk
self.parent = parent
self.parts = []
def defaults(self, **kwargs):
def pattern(self, *pattern):
"""
Define default keyword arguments for all patterns
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._defaults = kwargs
return self
def regex_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._regex_defaults = kwargs
return self
def string_defaults(self, **kwargs):
"""
Define default keyword arguments for string patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._string_defaults = kwargs
return self
def functional_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._functional_defaults = kwargs
return self
def chain(self):
"""
Add patterns chain, using configuration from this chain
:return:
:rtype:
"""
# pylint: disable=protected-access
chain = self.rebulk.chain(**self._kwargs)
chain._defaults = dict(self._defaults)
chain._regex_defaults = dict(self._regex_defaults)
chain._functional_defaults = dict(self._functional_defaults)
chain._string_defaults = dict(self._string_defaults)
return chain
def regex(self, *pattern, **kwargs):
"""
Add re pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._kwargs, kwargs)
set_defaults(self._regex_defaults, kwargs)
set_defaults(self._defaults, kwargs)
pattern = self.rebulk.build_re(*pattern, **kwargs)
part = ChainPart(self, pattern)
self.parts.append(part)
return part
def functional(self, *pattern, **kwargs):
"""
Add functional pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._kwargs, kwargs)
set_defaults(self._functional_defaults, kwargs)
set_defaults(self._defaults, kwargs)
pattern = self.rebulk.build_functional(*pattern, **kwargs)
part = ChainPart(self, pattern)
self.parts.append(part)
return part
def string(self, *pattern, **kwargs):
"""
Add string pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._kwargs, kwargs)
set_defaults(self._functional_defaults, kwargs)
set_defaults(self._defaults, kwargs)
pattern = self.rebulk.build_string(*pattern, **kwargs)
part = ChainPart(self, pattern)
if not pattern:
raise ValueError("One pattern should be given to the chain")
if len(pattern) > 1:
raise ValueError("Only one pattern can be given to the chain")
part = ChainPart(self, pattern[0])
self.parts.append(part)
return part
def close(self):
"""
Close chain builder to continue registering other pattern
:return:
:rtype:
Deeply close the chain
:return: Rebulk instance
"""
return self.rebulk
parent = self.parent
while isinstance(parent, Chain):
parent = parent.parent
return parent
def _match(self, pattern, input_string, context=None):
# pylint: disable=too-many-locals,too-many-nested-blocks
@ -173,42 +70,20 @@ class Chain(Pattern):
chain_found = False
current_chain_matches = []
valid_chain = True
is_chain_start = True
for chain_part in self.parts:
try:
chain_part_matches, raw_chain_part_matches = Chain._match_chain_part(is_chain_start, chain_part,
chain_input_string,
context)
Chain._fix_matches_offset(chain_part_matches, input_string, offset)
Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset)
if raw_chain_part_matches:
grouped_matches_dict = dict()
for match_index, match in itertools.groupby(chain_part_matches,
lambda m: m.match_index):
grouped_matches_dict[match_index] = list(match)
grouped_raw_matches_dict = dict()
for match_index, raw_match in itertools.groupby(raw_chain_part_matches,
lambda m: m.match_index):
grouped_raw_matches_dict[match_index] = list(raw_match)
for match_index, grouped_raw_matches in grouped_raw_matches_dict.items():
chain_found = True
offset = grouped_raw_matches[-1].raw_end
chain_input_string = input_string[offset:]
if not chain_part.is_hidden:
grouped_matches = grouped_matches_dict.get(match_index, [])
if self._chain_breaker_eval(current_chain_matches + grouped_matches):
current_chain_matches.extend(grouped_matches)
chain_part_matches, raw_chain_part_matches = chain_part.matches(chain_input_string,
context,
with_raw_matches=True)
chain_found, chain_input_string, offset = \
self._to_next_chain_part(chain_part, chain_part_matches, raw_chain_part_matches, chain_found,
input_string, chain_input_string, offset, current_chain_matches)
except _InvalidChainException:
valid_chain = False
if current_chain_matches:
offset = current_chain_matches[0].raw_end
break
is_chain_start = False
if not chain_found:
break
if current_chain_matches and valid_chain:
@ -217,38 +92,66 @@ class Chain(Pattern):
return chain_matches
def _match_parent(self, match, yield_parent):
def _to_next_chain_part(self, chain_part, chain_part_matches, raw_chain_part_matches, chain_found,
input_string, chain_input_string, offset, current_chain_matches):
Chain._fix_matches_offset(chain_part_matches, input_string, offset)
Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset)
if raw_chain_part_matches:
grouped_matches_dict = self._group_by_match_index(chain_part_matches)
grouped_raw_matches_dict = self._group_by_match_index(raw_chain_part_matches)
for match_index, grouped_raw_matches in grouped_raw_matches_dict.items():
chain_found = True
offset = grouped_raw_matches[-1].raw_end
chain_input_string = input_string[offset:]
if not chain_part.is_hidden:
grouped_matches = grouped_matches_dict.get(match_index, [])
if self._chain_breaker_eval(current_chain_matches + grouped_matches):
current_chain_matches.extend(grouped_matches)
return chain_found, chain_input_string, offset
def _process_match(self, match, match_index, child=False):
"""
Handle a parent match
Handle a match
:param match:
:type match:
:param yield_parent:
:type yield_parent:
:param match_index:
:type match_index:
:param child:
:type child:
:return:
:rtype:
"""
ret = super(Chain, self)._match_parent(match, yield_parent)
original_children = Matches(match.children)
original_end = match.end
while not ret and match.children:
last_pattern = match.children[-1].pattern
last_pattern_children = [child for child in match.children if child.pattern == last_pattern]
last_pattern_groups_iter = itertools.groupby(last_pattern_children, lambda child: child.match_index)
last_pattern_groups = {}
for index, matches in last_pattern_groups_iter:
last_pattern_groups[index] = list(matches)
# pylint: disable=too-many-locals
ret = super(Chain, self)._process_match(match, match_index, child=child)
if ret:
return True
for index in reversed(list(last_pattern_groups)):
last_matches = list(last_pattern_groups[index])
for last_match in last_matches:
match.children.remove(last_match)
match.end = match.children[-1].end if match.children else match.start
ret = super(Chain, self)._match_parent(match, yield_parent)
if ret:
return True
match.children = original_children
match.end = original_end
return ret
if match.children:
last_pattern = match.children[-1].pattern
last_pattern_groups = self._group_by_match_index(
[child_ for child_ in match.children if child_.pattern == last_pattern]
)
if last_pattern_groups:
original_children = Matches(match.children)
original_end = match.end
for index in reversed(list(last_pattern_groups)):
last_matches = last_pattern_groups[index]
for last_match in last_matches:
match.children.remove(last_match)
match.end = match.children[-1].end if match.children else match.start
ret = super(Chain, self)._process_match(match, match_index, child=child)
if ret:
return True
match.children = original_children
match.end = original_end
return False
def _build_chain_match(self, current_chain_matches, input_string):
start = None
@ -282,46 +185,11 @@ class Chain(Pattern):
Chain._fix_matches_offset(chain_part_match.children, input_string, offset)
@staticmethod
def _match_chain_part(is_chain_start, chain_part, chain_input_string, context):
chain_part_matches, raw_chain_part_matches = chain_part.pattern.matches(chain_input_string, context,
with_raw_matches=True)
chain_part_matches = Chain._truncate_chain_part_matches(is_chain_start, chain_part_matches, chain_part,
chain_input_string)
raw_chain_part_matches = Chain._truncate_chain_part_matches(is_chain_start, raw_chain_part_matches, chain_part,
chain_input_string)
Chain._validate_chain_part_matches(raw_chain_part_matches, chain_part)
return chain_part_matches, raw_chain_part_matches
@staticmethod
def _truncate_chain_part_matches(is_chain_start, chain_part_matches, chain_part, chain_input_string):
if not chain_part_matches:
return chain_part_matches
if not is_chain_start:
separator = chain_input_string[0:chain_part_matches[0].initiator.raw_start]
if separator:
return []
j = 1
for i in range(0, len(chain_part_matches) - 1):
separator = chain_input_string[chain_part_matches[i].initiator.raw_end:
chain_part_matches[i + 1].initiator.raw_start]
if separator:
break
j += 1
truncated = chain_part_matches[:j]
if chain_part.repeater_end is not None:
truncated = [m for m in truncated if m.match_index < chain_part.repeater_end]
return truncated
@staticmethod
def _validate_chain_part_matches(chain_part_matches, chain_part):
max_match_index = -1
if chain_part_matches:
max_match_index = max([m.match_index for m in chain_part_matches])
if max_match_index + 1 < chain_part.repeater_start:
raise _InvalidChainException
def _group_by_match_index(matches):
grouped_matches_dict = dict()
for match_index, match in itertools.groupby(matches, lambda m: m.match_index):
grouped_matches_dict[match_index] = list(match)
return grouped_matches_dict
@property
def match_options(self):
@ -338,7 +206,7 @@ class Chain(Pattern):
return "<%s%s:%s>" % (self.__class__.__name__, defined, self.parts)
class ChainPart(object):
class ChainPart(BasePattern):
"""
Part of a pattern chain.
"""
@ -350,6 +218,51 @@ class ChainPart(object):
self.repeater_end = 1
self._hidden = False
@property
def _is_chain_start(self):
return self._chain.parts[0] == self
def matches(self, input_string, context=None, with_raw_matches=False):
matches, raw_matches = self.pattern.matches(input_string, context=context, with_raw_matches=True)
matches = self._truncate_repeater(matches, input_string)
raw_matches = self._truncate_repeater(raw_matches, input_string)
self._validate_repeater(raw_matches)
if with_raw_matches:
return matches, raw_matches
return matches
def _truncate_repeater(self, matches, input_string):
if not matches:
return matches
if not self._is_chain_start:
separator = input_string[0:matches[0].initiator.raw_start]
if separator:
return []
j = 1
for i in range(0, len(matches) - 1):
separator = input_string[matches[i].initiator.raw_end:
matches[i + 1].initiator.raw_start]
if separator:
break
j += 1
truncated = matches[:j]
if self.repeater_end is not None:
truncated = [m for m in truncated if m.match_index < self.repeater_end]
return truncated
def _validate_repeater(self, matches):
max_match_index = -1
if matches:
max_match_index = max([m.match_index for m in matches])
if max_match_index + 1 < self.repeater_start:
raise _InvalidChainException
def chain(self):
"""
Add patterns chain, using configuration from this chain

View file

@ -15,9 +15,19 @@ def formatters(*chained_formatters):
:return:
:rtype:
"""
def formatters_chain(input_string): # pylint:disable=missing-docstring
for chained_formatter in chained_formatters:
input_string = chained_formatter(input_string)
return input_string
return formatters_chain
def default_formatter(input_string):
"""
Default formatter
:param input_string:
:return:
"""
return input_string

View file

@ -3,7 +3,7 @@
"""
Introspect rebulk object to retrieve capabilities.
"""
from abc import ABCMeta, abstractproperty
from abc import ABCMeta, abstractmethod
from collections import defaultdict
import six
@ -16,7 +16,8 @@ class Description(object):
"""
Abstract class for a description.
"""
@abstractproperty
@property
@abstractmethod
def properties(self): # pragma: no cover
"""
Properties of described object.

View file

@ -3,8 +3,18 @@
"""
Various utilities functions
"""
import inspect
import sys
from inspect import isclass
try:
from inspect import getfullargspec as getargspec
_fullargspec_supported = True
except ImportError:
_fullargspec_supported = False
from inspect import getargspec
from .utils import is_iterable
if sys.version_info < (3, 4, 0): # pragma: no cover
@ -45,8 +55,8 @@ def call(function, *args, **kwargs):
:return: sale vakye as default function call
:rtype: object
"""
func = constructor_args if inspect.isclass(function) else function_args
call_args, call_kwargs = func(function, *args, **kwargs)
func = constructor_args if isclass(function) else function_args
call_args, call_kwargs = func(function, *args, ignore_unused=True, **kwargs) # @see #20
return function(*call_args, **call_kwargs)
@ -63,7 +73,7 @@ def function_args(callable_, *args, **kwargs):
:return: (args, kwargs) matching the function signature
:rtype: tuple
"""
argspec = inspect.getargspec(callable_) # pylint:disable=deprecated-method
argspec = getargspec(callable_) # pylint:disable=deprecated-method
return argspec_args(argspec, False, *args, **kwargs)
@ -80,7 +90,7 @@ def constructor_args(class_, *args, **kwargs):
:return: (args, kwargs) matching the function signature
:rtype: tuple
"""
argspec = inspect.getargspec(_constructor(class_)) # pylint:disable=deprecated-method
argspec = getargspec(_constructor(class_)) # pylint:disable=deprecated-method
return argspec_args(argspec, True, *args, **kwargs)
@ -99,7 +109,7 @@ def argspec_args(argspec, constructor, *args, **kwargs):
:return: (args, kwargs) matching the function signature
:rtype: tuple
"""
if argspec.keywords:
if argspec.varkw:
call_kwarg = kwargs
else:
call_kwarg = dict((k, kwargs[k]) for k in kwargs if k in argspec.args) # Python 2.6 dict comprehension
@ -110,6 +120,36 @@ def argspec_args(argspec, constructor, *args, **kwargs):
return call_args, call_kwarg
if not _fullargspec_supported:
def argspec_args_legacy(argspec, constructor, *args, **kwargs):
"""
Return (args, kwargs) matching the argspec object
:param argspec: argspec to use
:type argspec: argspec
:param constructor: is it a constructor ?
:type constructor: bool
:param args:
:type args:
:param kwargs:
:type kwargs:
:return: (args, kwargs) matching the function signature
:rtype: tuple
"""
if argspec.keywords:
call_kwarg = kwargs
else:
call_kwarg = dict((k, kwargs[k]) for k in kwargs if k in argspec.args) # Python 2.6 dict comprehension
if argspec.varargs:
call_args = args
else:
call_args = args[:len(argspec.args) - (1 if constructor else 0)]
return call_args, call_kwarg
argspec_args = argspec_args_legacy
def ensure_list(param):
"""
Retrieves a list from given parameter.
@ -177,9 +217,12 @@ def filter_index(collection, predicate=None, index=None):
return collection
def set_defaults(defaults, kwargs):
def set_defaults(defaults, kwargs, override=False):
"""
Set defaults from defaults dict to kwargs dict
:param override:
:type override:
:param defaults:
:type defaults:
:param kwargs:
@ -187,12 +230,13 @@ def set_defaults(defaults, kwargs):
:return:
:rtype:
"""
if 'clear' in defaults.keys() and defaults.pop('clear'):
kwargs.clear()
for key, value in defaults.items():
if key not in kwargs and value is not None:
if key in kwargs:
if isinstance(value, list) and isinstance(kwargs[key], list):
kwargs[key] = list(value) + kwargs[key]
elif isinstance(value, dict) and isinstance(kwargs[key], dict):
set_defaults(value, kwargs[key])
if key not in kwargs or override:
kwargs[key] = value
elif isinstance(value, list) and isinstance(kwargs[key], list):
kwargs[key] = list(value) + kwargs[key]
elif isinstance(value, dict) and isinstance(kwargs[key], dict):
set_defaults(value, kwargs[key])
elif key in kwargs and value is None:
kwargs[key] = None

View file

@ -5,7 +5,11 @@ Classes and functions related to matches
"""
import copy
import itertools
from collections import defaultdict, MutableSequence
from collections import defaultdict
try:
from collections.abc import MutableSequence
except ImportError:
from collections import MutableSequence
try:
from collections import OrderedDict # pylint:disable=ungrouped-imports
@ -778,9 +782,9 @@ class Match(object):
right.start = end
if right:
ret.append(right)
elif end <= current.end and end > current.start:
elif current.end >= end > current.start:
current.start = end
elif start >= current.start and start < current.end:
elif current.start <= start < current.end:
current.end = start
return filter_index(ret, predicate, index)
@ -811,6 +815,24 @@ class Match(object):
return filter_index(ret, predicate, index)
def tagged(self, *tags):
"""
Check if this match has at least one of the provided tags
:param tags:
:return: True if at least one tag is defined, False otherwise.
"""
return any(tag in self.tags for tag in tags)
def named(self, *names):
"""
Check if one of the children match has one of the provided name
:param names:
:return: True if at least one child is named with a given name is defined, False otherwise.
"""
return any(name in self.names for name in names)
def __len__(self):
return self.end - self.start

View file

@ -10,14 +10,39 @@ from abc import ABCMeta, abstractmethod, abstractproperty
import six
from . import debug
from .formatters import default_formatter
from .loose import call, ensure_list, ensure_dict
from .match import Match
from .remodule import re, REGEX_AVAILABLE
from .utils import find_all, is_iterable, get_first_defined
from .validators import allways_true
@six.add_metaclass(ABCMeta)
class Pattern(object):
class BasePattern(object):
"""
Base class for Pattern like objects
"""
@abstractmethod
def matches(self, input_string, context=None, with_raw_matches=False):
"""
Computes all matches for a given input
:param input_string: the string to parse
:type input_string: str
:param context: the context
:type context: dict
:param with_raw_matches: should return details
:type with_raw_matches: dict
:return: matches based on input_string for this pattern
:rtype: iterator[Match]
"""
pass
@six.add_metaclass(ABCMeta)
class Pattern(BasePattern):
"""
Definition of a particular pattern to search for.
"""
@ -25,7 +50,7 @@ class Pattern(object):
def __init__(self, name=None, tags=None, formatter=None, value=None, validator=None, children=False, every=False,
private_parent=False, private_children=False, private=False, private_names=None, ignore_names=None,
marker=False, format_all=False, validate_all=False, disabled=lambda context: False, log_level=None,
properties=None, post_processor=None, **kwargs):
properties=None, post_processor=None, pre_match_processor=None, post_match_processor=None, **kwargs):
"""
:param name: Name of this pattern
:type name: str
@ -66,15 +91,19 @@ class Pattern(object):
:type disabled: bool|function
:param log_lvl: Log level associated to this pattern
:type log_lvl: int
:param post_process: Post processing function
:param post_processor: Post processing function
:type post_processor: func
:param pre_match_processor: Pre match processing function
:type pre_match_processor: func
:param post_match_processor: Post match processing function
:type post_match_processor: func
"""
# pylint:disable=too-many-locals,unused-argument
self.name = name
self.tags = ensure_list(tags)
self.formatters, self._default_formatter = ensure_dict(formatter, lambda x: x)
self.formatters, self._default_formatter = ensure_dict(formatter, default_formatter)
self.values, self._default_value = ensure_dict(value, None)
self.validators, self._default_validator = ensure_dict(validator, lambda match: True)
self.validators, self._default_validator = ensure_dict(validator, allways_true)
self.every = every
self.children = children
self.private = private
@ -96,6 +125,14 @@ class Pattern(object):
self.post_processor = None
else:
self.post_processor = post_processor
if not callable(pre_match_processor):
self.pre_match_processor = None
else:
self.pre_match_processor = pre_match_processor
if not callable(post_match_processor):
self.post_match_processor = None
else:
self.post_match_processor = post_match_processor
@property
def log_level(self):
@ -106,83 +143,6 @@ class Pattern(object):
"""
return self._log_level if self._log_level is not None else debug.LOG_LEVEL
def _yield_children(self, match):
"""
Does this match has children
:param match:
:type match:
:return:
:rtype:
"""
return match.children and (self.children or self.every)
def _yield_parent(self):
"""
Does this mat
:param match:
:type match:
:return:
:rtype:
"""
return not self.children or self.every
def _match_parent(self, match, yield_parent):
"""
Handle a parent match
:param match:
:type match:
:param yield_parent:
:type yield_parent:
:return:
:rtype:
"""
if not match or match.value == "":
return False
pattern_value = get_first_defined(self.values, [match.name, '__parent__', None],
self._default_value)
if pattern_value:
match.value = pattern_value
if yield_parent or self.format_all:
match.formatter = get_first_defined(self.formatters, [match.name, '__parent__', None],
self._default_formatter)
if yield_parent or self.validate_all:
validator = get_first_defined(self.validators, [match.name, '__parent__', None],
self._default_validator)
if validator and not validator(match):
return False
return True
def _match_child(self, child, yield_children):
"""
Handle a children match
:param child:
:type child:
:param yield_children:
:type yield_children:
:return:
:rtype:
"""
if not child or child.value == "":
return False
pattern_value = get_first_defined(self.values, [child.name, '__children__', None],
self._default_value)
if pattern_value:
child.value = pattern_value
if yield_children or self.format_all:
child.formatter = get_first_defined(self.formatters, [child.name, '__children__', None],
self._default_formatter)
if yield_children or self.validate_all:
validator = get_first_defined(self.validators, [child.name, '__children__', None],
self._default_validator)
if validator and not validator(child):
return False
return True
def matches(self, input_string, context=None, with_raw_matches=False):
"""
Computes all matches for a given input
@ -200,41 +160,168 @@ class Pattern(object):
matches = []
raw_matches = []
for pattern in self.patterns:
yield_parent = self._yield_parent()
match_index = -1
match_index = 0
for match in self._match(pattern, input_string, context):
match_index += 1
match.match_index = match_index
raw_matches.append(match)
yield_children = self._yield_children(match)
if not self._match_parent(match, yield_parent):
continue
validated = True
for child in match.children:
if not self._match_child(child, yield_children):
validated = False
break
if validated:
if self.private_parent:
match.private = True
if self.private_children:
for child in match.children:
child.private = True
if yield_parent or self.private_parent:
matches.append(match)
if yield_children or self.private_children:
for child in match.children:
child.match_index = match_index
matches.append(child)
matches = self._matches_post_process(matches)
self._matches_privatize(matches)
self._matches_ignore(matches)
matches.extend(self._process_matches(match, match_index))
match_index += 1
matches = self._post_process_matches(matches)
if with_raw_matches:
return matches, raw_matches
return matches
def _matches_post_process(self, matches):
@property
def _should_include_children(self):
"""
Check if children matches from this pattern should be included in matches results.
:param match:
:type match:
:return:
:rtype:
"""
return self.children or self.every
@property
def _should_include_parent(self):
"""
Check is a match from this pattern should be included in matches results.
:param match:
:type match:
:return:
:rtype:
"""
return not self.children or self.every
@staticmethod
def _match_config_property_keys(match, child=False):
if match.name:
yield match.name
if child:
yield '__children__'
else:
yield '__parent__'
yield None
@staticmethod
def _process_match_index(match, match_index):
"""
Process match index from this pattern process state.
:param match:
:return:
"""
match.match_index = match_index
def _process_match_private(self, match, child=False):
"""
Process match privacy from this pattern configuration.
:param match:
:param child:
:return:
"""
if match.name and match.name in self.private_names or \
not child and self.private_parent or \
child and self.private_children:
match.private = True
def _process_match_value(self, match, child=False):
"""
Process match value from this pattern configuration.
:param match:
:return:
"""
keys = self._match_config_property_keys(match, child=child)
pattern_value = get_first_defined(self.values, keys, self._default_value)
if pattern_value:
match.value = pattern_value
def _process_match_formatter(self, match, child=False):
"""
Process match formatter from this pattern configuration.
:param match:
:return:
"""
included = self._should_include_children if child else self._should_include_parent
if included or self.format_all:
keys = self._match_config_property_keys(match, child=child)
match.formatter = get_first_defined(self.formatters, keys, self._default_formatter)
def _process_match_validator(self, match, child=False):
"""
Process match validation from this pattern configuration.
:param match:
:return: True if match is validated by the configured validator, False otherwise.
"""
included = self._should_include_children if child else self._should_include_parent
if included or self.validate_all:
keys = self._match_config_property_keys(match, child=child)
validator = get_first_defined(self.validators, keys, self._default_validator)
if validator and not validator(match):
return False
return True
def _process_match(self, match, match_index, child=False):
"""
Process match from this pattern by setting all properties from defined configuration
(index, private, value, formatter, validator, ...).
:param match:
:type match:
:return: True if match is validated by the configured validator, False otherwise.
:rtype:
"""
self._process_match_index(match, match_index)
self._process_match_private(match, child)
self._process_match_value(match, child)
self._process_match_formatter(match, child)
return self._process_match_validator(match, child)
@staticmethod
def _process_match_processor(match, processor):
if processor:
ret = processor(match)
if ret is not None:
return ret
return match
def _process_matches(self, match, match_index):
"""
Process and generate all matches for the given unprocessed match.
:param match:
:param match_index:
:return: Process and dispatched matches.
"""
match = self._process_match_processor(match, self.pre_match_processor)
if not match:
return
if not self._process_match(match, match_index):
return
for child in match.children:
if not self._process_match(child, match_index, child=True):
return
match = self._process_match_processor(match, self.post_match_processor)
if not match:
return
if (self._should_include_parent or self.private_parent) and match.name not in self.ignore_names:
yield match
if self._should_include_children or self.private_children:
children = [x for x in match.children if x.name not in self.ignore_names]
for child in children:
yield child
def _post_process_matches(self, matches):
"""
Post process matches with user defined function
:param matches:
@ -246,32 +333,6 @@ class Pattern(object):
return self.post_processor(matches, self)
return matches
def _matches_privatize(self, matches):
"""
Mark matches included in private_names with private flag.
:param matches:
:type matches:
:return:
:rtype:
"""
if self.private_names:
for match in matches:
if match.name in self.private_names:
match.private = True
def _matches_ignore(self, matches):
"""
Ignore matches included in ignore_names.
:param matches:
:type matches:
:return:
:rtype:
"""
if self.ignore_names:
for match in list(matches):
if match.name in self.ignore_names:
matches.remove(match)
@abstractproperty
def patterns(self): # pragma: no cover
"""
@ -306,7 +367,7 @@ class Pattern(object):
@abstractmethod
def _match(self, pattern, input_string, context=None): # pragma: no cover
"""
Computes all matches for a given pattern and input
Computes all unprocess matches for a given pattern and input.
:param pattern: the pattern to use
:param input_string: the string to parse
@ -350,7 +411,9 @@ class StringPattern(Pattern):
def _match(self, pattern, input_string, context=None):
for index in find_all(input_string, pattern, **self._kwargs):
yield Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs)
match = Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs)
if match:
yield match
class RePattern(Pattern):
@ -411,15 +474,18 @@ class RePattern(Pattern):
for start, end in match_object.spans(i):
child_match = Match(start, end, name=name, parent=main_match, pattern=self,
input_string=input_string, **self._children_match_kwargs)
main_match.children.append(child_match)
if child_match:
main_match.children.append(child_match)
else:
start, end = match_object.span(i)
if start > -1 and end > -1:
child_match = Match(start, end, name=name, parent=main_match, pattern=self,
input_string=input_string, **self._children_match_kwargs)
main_match.children.append(child_match)
if child_match:
main_match.children.append(child_match)
yield main_match
if main_match:
yield main_match
class FunctionalPattern(Pattern):
@ -457,14 +523,18 @@ class FunctionalPattern(Pattern):
if self._match_kwargs:
options = self._match_kwargs.copy()
options.update(args)
yield Match(pattern=self, input_string=input_string, **options)
match = Match(pattern=self, input_string=input_string, **options)
if match:
yield match
else:
kwargs = self._match_kwargs
if isinstance(args[-1], dict):
kwargs = dict(kwargs)
kwargs.update(args[-1])
args = args[:-1]
yield Match(*args, pattern=self, input_string=input_string, **kwargs)
match = Match(*args, pattern=self, input_string=input_string, **kwargs)
if match:
yield match
def filter_match_kwargs(kwargs, children=False):

View file

@ -30,7 +30,7 @@ def _default_conflict_solver(match, conflicting_match):
"""
if len(conflicting_match.initiator) < len(match.initiator):
return conflicting_match
elif len(match.initiator) < len(conflicting_match.initiator):
if len(match.initiator) < len(conflicting_match.initiator):
return match
return None

View file

@ -5,20 +5,16 @@ Entry point functions and classes for Rebulk
"""
from logging import getLogger
from .builder import Builder
from .match import Matches
from .pattern import RePattern, StringPattern, FunctionalPattern
from .chain import Chain
from .processors import ConflictSolver, PrivateRemover
from .loose import set_defaults
from .utils import extend_safe
from .rules import Rules
from .utils import extend_safe
log = getLogger(__name__).log
class Rebulk(object):
class Rebulk(Builder):
r"""
Regular expression, string and function based patterns are declared in a ``Rebulk`` object. It use a fluent API to
chain ``string``, ``regex``, and ``functional`` methods to define various patterns types.
@ -44,6 +40,7 @@ class Rebulk(object):
>>> bulk.matches("the lakers are from la")
[<lakers:(4, 10)>, <la:(20, 22)>]
"""
# pylint:disable=protected-access
def __init__(self, disabled=lambda context: False, default_rules=True):
@ -56,6 +53,7 @@ class Rebulk(object):
:return:
:rtype:
"""
super(Rebulk, self).__init__()
if not callable(disabled):
self.disabled = lambda context: disabled
else:
@ -64,11 +62,6 @@ class Rebulk(object):
self._rules = Rules()
if default_rules:
self.rules(ConflictSolver, PrivateRemover)
self._defaults = {}
self._regex_defaults = {}
self._string_defaults = {}
self._functional_defaults = {}
self._chain_defaults = {}
self._rebulks = []
def pattern(self, *pattern):
@ -83,172 +76,6 @@ class Rebulk(object):
self._patterns.extend(pattern)
return self
def defaults(self, **kwargs):
"""
Define default keyword arguments for all patterns
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._defaults = kwargs
return self
def regex_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._regex_defaults = kwargs
return self
def regex(self, *pattern, **kwargs):
"""
Add re pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
self.pattern(self.build_re(*pattern, **kwargs))
return self
def build_re(self, *pattern, **kwargs):
"""
Builds a new regular expression pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._regex_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return RePattern(*pattern, **kwargs)
def string_defaults(self, **kwargs):
"""
Define default keyword arguments for string patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._string_defaults = kwargs
return self
def string(self, *pattern, **kwargs):
"""
Add string pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
self.pattern(self.build_string(*pattern, **kwargs))
return self
def build_string(self, *pattern, **kwargs):
"""
Builds a new string pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._string_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return StringPattern(*pattern, **kwargs)
def functional_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._functional_defaults = kwargs
return self
def functional(self, *pattern, **kwargs):
"""
Add functional pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
self.pattern(self.build_functional(*pattern, **kwargs))
return self
def build_functional(self, *pattern, **kwargs):
"""
Builds a new functional pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._functional_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return FunctionalPattern(*pattern, **kwargs)
def chain_defaults(self, **kwargs):
"""
Define default keyword arguments for patterns chain.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._chain_defaults = kwargs
return self
def chain(self, **kwargs):
"""
Add patterns chain, using configuration of this rebulk
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
chain = self.build_chain(**kwargs)
self._patterns.append(chain)
return chain
def build_chain(self, **kwargs):
"""
Builds a new patterns chain
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._chain_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return Chain(self, **kwargs)
def rules(self, *rules):
"""
Add rules as a module, class or instance.

View file

@ -140,10 +140,9 @@ class RemoveMatch(Consequence): # pylint: disable=abstract-method
matches.remove(match)
ret.append(match)
return ret
else:
if when_response in matches:
matches.remove(when_response)
return when_response
if when_response in matches:
matches.remove(when_response)
return when_response
class AppendMatch(Consequence): # pylint: disable=abstract-method
@ -164,12 +163,11 @@ class AppendMatch(Consequence): # pylint: disable=abstract-method
matches.append(match)
ret.append(match)
return ret
else:
if self.match_name:
when_response.name = self.match_name
if when_response not in matches:
matches.append(when_response)
return when_response
if self.match_name:
when_response.name = self.match_name
if when_response not in matches:
matches.append(when_response)
return when_response
class RenameMatch(Consequence): # pylint: disable=abstract-method

View file

@ -2,11 +2,11 @@
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, no-member, len-as-condition
import re
from functools import partial
from rebulk.pattern import FunctionalPattern, StringPattern, RePattern
from ..rebulk import Rebulk
from ..validators import chars_surround
from ..rebulk import Rebulk, FunctionalPattern, RePattern, StringPattern
def test_chain_close():
@ -63,18 +63,61 @@ def test_build_chain():
def test_chain_defaults():
rebulk = Rebulk()
rebulk.defaults(validator=lambda x: True, ignore_names=['testIgnore'], children=True)
rebulk.defaults(validator=lambda x: x.value.startswith('t'), ignore_names=['testIgnore'], children=True)
rebulk.chain()\
rebulk.chain() \
.regex("(?P<test>test)") \
.regex(" ").repeater("*") \
.regex("(?P<best>best)") \
.regex(" ").repeater("*") \
.regex("(?P<testIgnore>testIgnore)")
matches = rebulk.matches("test testIgnore")
matches = rebulk.matches("test best testIgnore")
assert len(matches) == 1
assert matches[0].name == "test"
def test_chain_with_validators():
def chain_validator(match):
return match.value.startswith('t') and match.value.endswith('t')
def default_validator(match):
return match.value.startswith('t') and match.value.endswith('g')
def custom_validator(match):
return match.value.startswith('b') and match.value.endswith('t')
rebulk = Rebulk()
rebulk.defaults(children=True, validator=default_validator)
rebulk.chain(validate_all=True, validator={'__parent__': chain_validator}) \
.regex("(?P<test>testing)", validator=default_validator).repeater("+") \
.regex(" ").repeater("+") \
.regex("(?P<best>best)", validator=custom_validator).repeater("+")
matches = rebulk.matches("some testing best end")
assert len(matches) == 2
assert matches[0].name == "test"
assert matches[1].name == "best"
def test_matches_docs():
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE) \
.defaults(children=True, formatter={'episode': int, 'version': int}) \
.chain() \
.regex(r'e(?P<episode>\d{1,4})').repeater(1) \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'[ex-](?P<episode>\d{1,4})').repeater('*') \
.close() # .repeater(1) could be omitted as it's the default behavior
result = rebulk.matches("This is E14v2-15-16-17").to_dict() # converts matches to dict
assert 'episode' in result
assert result['episode'] == [14, 15, 16, 17]
assert 'version' in result
assert result['version'] == 2
def test_matches():
rebulk = Rebulk()
@ -144,8 +187,8 @@ def test_matches():
def test_matches_2():
rebulk = Rebulk() \
.regex_defaults(flags=re.IGNORECASE) \
.chain(children=True, formatter={'episode': int}) \
.defaults(formatter={'version': int}) \
.defaults(children=True, formatter={'episode': int, 'version': int}) \
.chain() \
.regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'[ex-](?P<episode>\d{1,4})').repeater('*') \
@ -173,25 +216,32 @@ def test_matches_2():
def test_matches_3():
alt_dash = (r'@', r'[\W_]') # abbreviation
rebulk = Rebulk()
match_names = ['season', 'episode']
other_names = ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date']
rebulk.chain(formatter={'season': int, 'episode': int},
tags=['SxxExx'],
abbreviations=[alt_dash],
private_names=['episodeSeparator', 'seasonSeparator'],
children=True,
private_parent=True,
conflict_solver=lambda match, other: match
if match.name in ['season', 'episode'] and other.name in
['screen_size', 'video_codec', 'audio_codec',
'audio_channels', 'container', 'date']
else '__default__') \
rebulk = Rebulk()
rebulk.defaults(formatter={'season': int, 'episode': int},
tags=['SxxExx'],
abbreviations=[alt_dash],
private_names=['episodeSeparator', 'seasonSeparator'],
children=True,
private_parent=True,
conflict_solver=lambda match, other: match
if match.name in match_names and other.name in other_names
else '__default__')
rebulk.chain() \
.defaults(children=True, private_parent=True) \
.regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)') \
.regex(r'(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+)').repeater('*') \
.close() \
.chain() \
.defaults(children=True, private_parent=True) \
.regex(r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)') \
.regex(r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))').repeater('*') \
.close() \
.chain() \
.defaults(children=True, private_parent=True) \
.regex(r'S(?P<season>\d+)') \
.regex(r'(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+)').repeater('*')
@ -240,11 +290,11 @@ def test_matches_4():
rebulk = Rebulk()
rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
validator={'__parent__': seps_surround}, children=True, private_parent=True)
rebulk.defaults(validate_all=True, children=True)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], private_parent=True)
rebulk.chain(formatter={'episode': int, 'version': int}) \
.defaults(validator=None) \
rebulk.chain(validator={'__parent__': seps_surround}, formatter={'episode': int, 'version': int}) \
.defaults(formatter={'episode': int, 'version': int}) \
.regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')
@ -262,11 +312,11 @@ def test_matches_5():
rebulk = Rebulk()
rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
validator={'__parent__': seps_surround}, children=True, private_parent=True)
rebulk.chain(formatter={'episode': int, 'version': int}) \
.defaults(validator=None) \
rebulk.chain(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
validator={'__parent__': seps_surround}, children=True, private_parent=True,
formatter={'episode': int, 'version': int}) \
.defaults(children=True, private_parent=True) \
.regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}')
@ -288,7 +338,7 @@ def test_matches_6():
validator=None, children=True, private_parent=True)
rebulk.chain(formatter={'episode': int, 'version': int}) \
.defaults(validator=None) \
.defaults(children=True, private_parent=True) \
.regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}')

View file

@ -2,19 +2,15 @@
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, protected-access, invalid-name, len-as-condition
from .default_rules_module import RuleRemove0
from .. import debug
from ..match import Match
from ..pattern import StringPattern
from ..rebulk import Rebulk
from ..match import Match
from .. import debug
from .default_rules_module import RuleRemove0
class TestDebug(object):
#request.addfinalizer(disable_debug)
# request.addfinalizer(disable_debug)
debug.DEBUG = True
pattern = StringPattern(1, 3, value="es")
@ -38,43 +34,43 @@ class TestDebug(object):
debug.DEBUG = False
def test_pattern(self):
assert self.pattern.defined_at.lineno == 20
assert self.pattern.defined_at.lineno > 0
assert self.pattern.defined_at.name == 'rebulk.test.test_debug'
assert self.pattern.defined_at.filename.endswith('test_debug.py')
assert str(self.pattern.defined_at) == 'test_debug.py#L20'
assert repr(self.pattern) == '<StringPattern@test_debug.py#L20:(1, 3)>'
assert str(self.pattern.defined_at).startswith('test_debug.py#L')
assert repr(self.pattern).startswith('<StringPattern@test_debug.py#L')
def test_match(self):
assert self.match.defined_at.lineno == 22
assert self.match.defined_at.lineno > 0
assert self.match.defined_at.name == 'rebulk.test.test_debug'
assert self.match.defined_at.filename.endswith('test_debug.py')
assert str(self.match.defined_at) == 'test_debug.py#L22'
assert str(self.match.defined_at).startswith('test_debug.py#L')
def test_rule(self):
assert self.rule.defined_at.lineno == 23
assert self.rule.defined_at.lineno > 0
assert self.rule.defined_at.name == 'rebulk.test.test_debug'
assert self.rule.defined_at.filename.endswith('test_debug.py')
assert str(self.rule.defined_at) == 'test_debug.py#L23'
assert repr(self.rule) == '<RuleRemove0@test_debug.py#L23>'
assert str(self.rule.defined_at).startswith('test_debug.py#L')
assert repr(self.rule).startswith('<RuleRemove0@test_debug.py#L')
def test_rebulk(self):
"""
This test fails on travis CI, can't find out why there's 1 line offset ...
"""
assert self.rebulk._patterns[0].defined_at.lineno in [26, 27]
assert self.rebulk._patterns[0].defined_at.lineno > 0
assert self.rebulk._patterns[0].defined_at.name == 'rebulk.test.test_debug'
assert self.rebulk._patterns[0].defined_at.filename.endswith('test_debug.py')
assert str(self.rebulk._patterns[0].defined_at) in ['test_debug.py#L26', 'test_debug.py#L27']
assert str(self.rebulk._patterns[0].defined_at).startswith('test_debug.py#L')
assert self.rebulk._patterns[1].defined_at.lineno in [27, 28]
assert self.rebulk._patterns[1].defined_at.lineno > 0
assert self.rebulk._patterns[1].defined_at.name == 'rebulk.test.test_debug'
assert self.rebulk._patterns[1].defined_at.filename.endswith('test_debug.py')
assert str(self.rebulk._patterns[1].defined_at) in ['test_debug.py#L27', 'test_debug.py#L28']
assert str(self.rebulk._patterns[1].defined_at).startswith('test_debug.py#L')
assert self.matches[0].defined_at == self.rebulk._patterns[0].defined_at
assert self.matches[1].defined_at == self.rebulk._patterns[1].defined_at

View file

@ -116,6 +116,9 @@ class TestMatchesClass(object):
assert "tag1" in matches.tags
assert "tag2" in matches.tags
assert self.match3.tagged("tag1")
assert not self.match3.tagged("start")
tag1 = matches.tagged("tag1")
assert len(tag1) == 2
assert tag1[0] == self.match2

View file

@ -3,7 +3,10 @@
"""
Various utilities functions
"""
from collections import MutableSet
try:
from collections.abc import MutableSet
except ImportError:
from collections import MutableSet
from types import GeneratorType

View file

@ -62,9 +62,20 @@ def validators(*chained_validators):
:return:
:rtype:
"""
def validator_chain(match): # pylint:disable=missing-docstring
for chained_validator in chained_validators:
if not chained_validator(match):
return False
return True
return validator_chain
def allways_true(match): # pylint:disable=unused-argument
"""
A validator which is allways true
:param match:
:return:
"""
return True

View file

@ -73,9 +73,9 @@ class Addic7edSubtitle(Subtitle):
# resolution
if video.resolution and self.version and video.resolution in self.version.lower():
matches.add('resolution')
# format
if video.format and self.version and video.format.lower() in self.version.lower():
matches.add('format')
# source
if video.source and self.version and video.source.lower() in self.version.lower():
matches.add('source')
# other properties
matches |= guess_matches(video, guessit(self.version), partial=True)

View file

@ -46,13 +46,13 @@ def refine(video, embedded_subtitles=True, **kwargs):
# video codec
if video_track.codec_id == 'V_MPEG4/ISO/AVC':
video.video_codec = 'h264'
video.video_codec = 'H.264'
logger.debug('Found video_codec %s', video.video_codec)
elif video_track.codec_id == 'V_MPEG4/ISO/SP':
video.video_codec = 'DivX'
logger.debug('Found video_codec %s', video.video_codec)
elif video_track.codec_id == 'V_MPEG4/ISO/ASP':
video.video_codec = 'XviD'
video.video_codec = 'Xvid'
logger.debug('Found video_codec %s', video.video_codec)
else:
logger.warning('MKV has no video track')
@ -62,7 +62,7 @@ def refine(video, embedded_subtitles=True, **kwargs):
audio_track = mkv.audio_tracks[0]
# audio codec
if audio_track.codec_id == 'A_AC3':
video.audio_codec = 'AC3'
video.audio_codec = 'Dolby Digital'
logger.debug('Found audio_codec %s', video.audio_codec)
elif audio_track.codec_id == 'A_DTS':
video.audio_codec = 'DTS'

View file

@ -17,7 +17,7 @@ Available matches:
* season
* episode
* release_group
* format
* source
* audio_codec
* resolution
* hearing_impaired
@ -38,11 +38,11 @@ logger = logging.getLogger(__name__)
#: Scores for episodes
episode_scores = {'hash': 359, 'series': 180, 'year': 90, 'season': 30, 'episode': 30, 'release_group': 15,
'format': 7, 'audio_codec': 3, 'resolution': 2, 'video_codec': 2, 'hearing_impaired': 1}
'source': 7, 'audio_codec': 3, 'resolution': 2, 'video_codec': 2, 'hearing_impaired': 1}
#: Scores for movies
movie_scores = {'hash': 119, 'title': 60, 'year': 30, 'release_group': 15,
'format': 7, 'audio_codec': 3, 'resolution': 2, 'video_codec': 2, 'hearing_impaired': 1}
'source': 7, 'audio_codec': 3, 'resolution': 2, 'video_codec': 2, 'hearing_impaired': 1}
#: Equivalent release groups
equivalent_release_groups = ({'LOL', 'DIMENSION'}, {'ASAP', 'IMMERSE', 'FLEET'}, {'AVS', 'SVA'})
@ -153,30 +153,30 @@ def solve_episode_equations():
from sympy import Eq, solve, symbols
hash, series, year, season, episode, release_group = symbols('hash series year season episode release_group')
format, audio_codec, resolution, video_codec = symbols('format audio_codec resolution video_codec')
source, audio_codec, resolution, video_codec = symbols('source audio_codec resolution video_codec')
hearing_impaired = symbols('hearing_impaired')
equations = [
# hash is best
Eq(hash, series + year + season + episode + release_group + format + audio_codec + resolution + video_codec),
Eq(hash, series + year + season + episode + release_group + source + audio_codec + resolution + video_codec),
# series counts for the most part in the total score
Eq(series, year + season + episode + release_group + format + audio_codec + resolution + video_codec + 1),
Eq(series, year + season + episode + release_group + source + audio_codec + resolution + video_codec + 1),
# year is the second most important part
Eq(year, season + episode + release_group + format + audio_codec + resolution + video_codec + 1),
Eq(year, season + episode + release_group + source + audio_codec + resolution + video_codec + 1),
# season is important too
Eq(season, release_group + format + audio_codec + resolution + video_codec + 1),
Eq(season, release_group + source + audio_codec + resolution + video_codec + 1),
# episode is equally important to season
Eq(episode, season),
# release group is the next most wanted match
Eq(release_group, format + audio_codec + resolution + video_codec + 1),
Eq(release_group, source + audio_codec + resolution + video_codec + 1),
# format counts as much as audio_codec, resolution and video_codec
Eq(format, audio_codec + resolution + video_codec),
# source counts as much as audio_codec, resolution and video_codec
Eq(source, audio_codec + resolution + video_codec),
# audio_codec is more valuable than video_codec
Eq(audio_codec, video_codec + 1),
@ -191,7 +191,7 @@ def solve_episode_equations():
Eq(hearing_impaired, 1),
]
return solve(equations, [hash, series, year, season, episode, release_group, format, audio_codec, resolution,
return solve(equations, [hash, series, year, season, episode, release_group, source, audio_codec, resolution,
hearing_impaired, video_codec])
@ -199,24 +199,24 @@ def solve_movie_equations():
from sympy import Eq, solve, symbols
hash, title, year, release_group = symbols('hash title year release_group')
format, audio_codec, resolution, video_codec = symbols('format audio_codec resolution video_codec')
source, audio_codec, resolution, video_codec = symbols('source audio_codec resolution video_codec')
hearing_impaired = symbols('hearing_impaired')
equations = [
# hash is best
Eq(hash, title + year + release_group + format + audio_codec + resolution + video_codec),
Eq(hash, title + year + release_group + source + audio_codec + resolution + video_codec),
# title counts for the most part in the total score
Eq(title, year + release_group + format + audio_codec + resolution + video_codec + 1),
Eq(title, year + release_group + source + audio_codec + resolution + video_codec + 1),
# year is the second most important part
Eq(year, release_group + format + audio_codec + resolution + video_codec + 1),
Eq(year, release_group + source + audio_codec + resolution + video_codec + 1),
# release group is the next most wanted match
Eq(release_group, format + audio_codec + resolution + video_codec + 1),
Eq(release_group, source + audio_codec + resolution + video_codec + 1),
# format counts as much as audio_codec, resolution and video_codec
Eq(format, audio_codec + resolution + video_codec),
# source counts as much as audio_codec, resolution and video_codec
Eq(source, audio_codec + resolution + video_codec),
# audio_codec is more valuable than video_codec
Eq(audio_codec, video_codec + 1),
@ -231,5 +231,5 @@ def solve_movie_equations():
Eq(hearing_impaired, 1),
]
return solve(equations, [hash, title, year, release_group, format, audio_codec, resolution, hearing_impaired,
return solve(equations, [hash, title, year, release_group, source, audio_codec, resolution, hearing_impaired,
video_codec])

View file

@ -10,6 +10,7 @@ import pysrt
from .score import get_equivalent_release_groups
from .video import Episode, Movie
from .utils import sanitize, sanitize_release_group
from six import text_type
logger = logging.getLogger(__name__)
@ -71,10 +72,12 @@ class Subtitle(object):
if not self.content:
return
if self.encoding:
return self.content.decode(self.encoding, errors='replace')
if not isinstance(self.content, text_type):
if self.encoding:
return self.content.decode(self.encoding, errors='replace')
return self.content.decode(self.guess_encoding(), errors='replace')
return self.content.decode(self.guess_encoding(), errors='replace')
return self.content
def is_valid(self):
"""Check if a :attr:`text` is a valid SubRip format.
@ -238,9 +241,9 @@ def guess_matches(video, guess, partial=False):
# resolution
if video.resolution and 'screen_size' in guess and guess['screen_size'] == video.resolution:
matches.add('resolution')
# format
if video.format and 'format' in guess and guess['format'].lower() == video.format.lower():
matches.add('format')
# source
if video.source and 'source' in guess and guess['source'].lower() == video.source.lower():
matches.add('source')
# video_codec
if video.video_codec and 'video_codec' in guess and guess['video_codec'] == video.video_codec:
matches.add('video_codec')

View file

@ -25,7 +25,7 @@ class Video(object):
Represent a video, existing or not.
:param str name: name or path of the video.
:param str format: format of the video (HDTV, WEB-DL, BluRay, ...).
:param str source: source of the video (HDTV, Web, Blu-ray, ...).
:param str release_group: release group of the video.
:param str resolution: resolution of the video stream (480p, 720p, 1080p or 1080i).
:param str video_codec: codec of the video stream.
@ -36,13 +36,13 @@ class Video(object):
:param set subtitle_languages: existing subtitle languages.
"""
def __init__(self, name, format=None, release_group=None, resolution=None, video_codec=None, audio_codec=None,
def __init__(self, name, source=None, release_group=None, resolution=None, video_codec=None, audio_codec=None,
imdb_id=None, hashes=None, size=None, subtitle_languages=None):
#: Name or path of the video
self.name = name
#: Format of the video (HDTV, WEB-DL, BluRay, ...)
self.format = format
#: Source of the video (HDTV, Web, Blu-ray, ...)
self.source = source
#: Release group of the video
self.release_group = release_group
@ -177,7 +177,7 @@ class Episode(Video):
episode = min(episode_guess) if episode_guess and isinstance(episode_guess, list) else episode_guess
return cls(name, guess['title'], guess.get('season', 1), episode, title=guess.get('episode_title'),
year=guess.get('year'), format=guess.get('format'), original_series='year' not in guess,
year=guess.get('year'), source=guess.get('source'), original_series='year' not in guess,
release_group=guess.get('release_group'), resolution=guess.get('screen_size'),
video_codec=guess.get('video_codec'), audio_codec=guess.get('audio_codec'))
@ -225,7 +225,7 @@ class Movie(Video):
if 'alternative_title' in guess:
alternative_titles.append(u"%s %s" % (guess['title'], guess['alternative_title']))
return cls(name, guess['title'], format=guess.get('format'), release_group=guess.get('release_group'),
return cls(name, guess['title'], source=guess.get('source'), release_group=guess.get('release_group'),
resolution=guess.get('screen_size'), video_codec=guess.get('video_codec'),
audio_codec=guess.get('audio_codec'), year=guess.get('year'), alternative_titles=alternative_titles)

View file

@ -44,11 +44,11 @@ class Addic7edSubtitle(_Addic7edSubtitle):
if not subliminal.score.episode_scores.get("addic7ed_boost"):
return matches
# if the release group matches, the format is most likely correct, as well
# if the release group matches, the source is most likely correct, as well
if "release_group" in matches:
matches.add("format")
matches.add("source")
if {"series", "season", "episode", "year"}.issubset(matches) and "format" in matches:
if {"series", "season", "episode", "year"}.issubset(matches) and "source" in matches:
matches.add("addic7ed_boost")
logger.info("Boosting Addic7ed subtitle by %s" % subliminal.score.episode_scores.get("addic7ed_boost"))
return matches

View file

@ -39,7 +39,7 @@ class ArgenteamSubtitle(Subtitle):
self.asked_for_release_group = asked_for_release_group
self.asked_for_episode = asked_for_episode
self.matches = None
self.format = source
self.source = source
self.video_codec = video_codec
self.tvdb_id = tvdb_id
self.imdb_id = "tt" + imdb_id if imdb_id else None
@ -55,7 +55,7 @@ class ArgenteamSubtitle(Subtitle):
return self._release_info
combine = []
for attr in ("format", "version"):
for attr in ("source", "version"):
value = getattr(self, attr)
if value:
combine.append(value)
@ -115,22 +115,22 @@ class ArgenteamSubtitle(Subtitle):
if any(r in sanitize_release_group(self.release) for r in get_equivalent_release_groups(rg)):
matches.add('release_group')
# blatantly assume we've got a matching format if the release group matches
# blatantly assume we've got a matching source if the release group matches
# fixme: smart?
#matches.add('format')
#matches.add('source')
# resolution
if video.resolution and self.version and str(video.resolution) in self.version.lower():
matches.add('resolution')
# format
if video.format and self.format:
formats = [video.format]
if video.format == "WEB-DL":
# source
if video.source and self.source:
formats = [video.source]
if video.source == "Web":
formats.append("WEB")
for fmt in formats:
if fmt.lower() in self.format.lower():
matches.add('format')
if fmt.lower() in self.source.lower():
matches.add('source')
break
matches |= guess_matches(video, guessit(self.release_info), partial=True)

View file

@ -65,25 +65,25 @@ class BSPlayerSubtitle(Subtitle):
if video.resolution and video.resolution.lower() in subtitle_filename:
matches.add('resolution')
# format
# source
formats = []
if video.format:
formats = [video.format.lower()]
if formats[0] == "web-dl":
if video.source:
formats = [video.source.lower()]
if formats[0] == "web":
formats.append("webdl")
formats.append("webrip")
formats.append("web ")
for frmt in formats:
if frmt.lower() in subtitle_filename:
matches.add('format')
matches.add('source')
break
# video_codec
if video.video_codec:
video_codecs = [video.video_codec.lower()]
if video_codecs[0] == "h264":
if video_codecs[0] == "H.264":
formats.append("x264")
elif video_codecs[0] == "h265":
elif video_codecs[0] == "H.265":
formats.append("x265")
for vc in formats:
if vc.lower() in subtitle_filename:

View file

@ -112,9 +112,9 @@ class HosszupuskaSubtitle(Subtitle):
# resolution
if video.resolution and self.version and video.resolution in self.version.lower():
matches.add('resolution')
# format
if video.format and self.version and video.format.lower() in self.version.lower():
matches.add('format')
# source
if video.source and self.version and video.source.lower() in self.version.lower():
matches.add('source')
# other properties
matches |= guess_matches(video, guessit(self.release_info))

View file

@ -118,25 +118,25 @@ class LegendasdivxSubtitle(Subtitle):
if video.resolution and video.resolution.lower() in description:
matches.update(['resolution'])
# format
# source
formats = []
if video.format:
formats = [video.format.lower()]
if formats[0] == "web-dl":
if video.source:
formats = [video.source.lower()]
if formats[0] == "web":
formats.append("webdl")
formats.append("webrip")
formats.append("web")
for frmt in formats:
if frmt in description:
matches.update(['format'])
matches.update(['source'])
break
# video_codec
if video.video_codec:
video_codecs = [video.video_codec.lower()]
if video_codecs[0] == "h264":
if video_codecs[0] == "H.264":
video_codecs.append("x264")
elif video_codecs[0] == "h265":
elif video_codecs[0] == "H.265":
video_codecs.append("x265")
for vc in video_codecs:
if vc in description:

View file

@ -83,7 +83,7 @@ class ProviderSubtitleArchiveMixin(object):
# consider subtitle valid if:
# - episode and season match
# - format matches (if it was matched before)
# - source matches (if it was matched before)
# - release group matches (and we asked for one and it was matched, or it was not matched)
# - not asked for forced and "forced" not in filename
is_episode = subtitle.asked_for_episode
@ -103,27 +103,27 @@ class ProviderSubtitleArchiveMixin(object):
or (subtitle.is_pack and subtitle.asked_for_episode in episodes)
) and guess.get("season") == subtitle.season):
format_matches = True
wanted_format_but_not_found = False
source_matches = True
wanted_source_but_not_found = False
if "format" in subtitle.matches:
format_matches = False
if "source" in subtitle.matches:
source_matches = False
if isinstance(subtitle.releases, list):
releases = ",".join(subtitle.releases).lower()
else:
releases = subtitle.releases.lower()
if "format" not in guess:
wanted_format_but_not_found = True
if "source" not in guess:
wanted_source_but_not_found = True
else:
formats = guess["format"]
formats = guess["source"]
if not isinstance(formats, list):
formats = [formats]
for f in formats:
format_matches = f.lower() in releases
if format_matches:
source_matches = f.lower() in releases
if source_matches:
break
release_group_matches = True
@ -139,11 +139,11 @@ class ProviderSubtitleArchiveMixin(object):
if asked_for_rlsgrp in sub_name_lower:
release_group_matches = True
if release_group_matches and format_matches:
if release_group_matches and source_matches:
matching_sub = sub_name
break
elif release_group_matches and wanted_format_but_not_found:
elif release_group_matches and wanted_source_but_not_found:
subs_unsure.append(sub_name)
else:
subs_fallback.append(sub_name)

Some files were not shown because too many files have changed in this diff Show more