mirror of
https://github.com/morpheus65535/bazarr.git
synced 2025-04-24 06:37:16 -04:00
Improved uppercase detection / execution in Fix Uppercase mod
This commit is contained in:
parent
cefd1638dd
commit
2a330f63dc
1 changed files with 30 additions and 28 deletions
|
@ -22,7 +22,7 @@ class SubtitleModifications(object):
|
|||
language = None
|
||||
initialized_mods = {}
|
||||
mods_used = []
|
||||
only_uppercase = False
|
||||
mostly_uppercase = False
|
||||
f = None
|
||||
|
||||
font_style_tag_start = u"{\\"
|
||||
|
@ -118,7 +118,7 @@ class SubtitleModifications(object):
|
|||
identifier, self.language)
|
||||
continue
|
||||
|
||||
if mod_cls.only_uppercase and not self.only_uppercase:
|
||||
if mod_cls.only_uppercase and not self.mostly_uppercase:
|
||||
if self.debug:
|
||||
logger.debug("Skipping %s, because the subtitle isn't all uppercase", identifier)
|
||||
continue
|
||||
|
@ -188,41 +188,43 @@ class SubtitleModifications(object):
|
|||
return line_mods, non_line_mods, used_mods
|
||||
|
||||
def detect_uppercase(self):
|
||||
entries_used = 0
|
||||
for entry in self.f:
|
||||
entry_used = False
|
||||
sub = entry.text
|
||||
# skip HI bracket entries, those might actually be lowercase
|
||||
sub = sub.strip()
|
||||
for processor in registry.mods["remove_HI"].processors[:4]:
|
||||
sub = processor.process(sub)
|
||||
MAXIMUM_ENTRIES = 50
|
||||
MINIMUM_UPPERCASE_PERCENTAGE = 90
|
||||
MINIMUM_UPPERCASE_COUNT = 100
|
||||
entry_count = 0
|
||||
uppercase_count = 0
|
||||
lowercase_count = 0
|
||||
|
||||
if sub.strip():
|
||||
# only consider alphabetic characters to determine if uppercase
|
||||
alpha_sub = ''.join([i for i in sub if i.isalpha()])
|
||||
if alpha_sub and not alpha_sub.isupper():
|
||||
return False
|
||||
for entry in self.f:
|
||||
sub = entry.text
|
||||
# skip HI bracket entries, those might actually be lowercase
|
||||
sub = sub.strip()
|
||||
for processor in registry.mods["remove_HI"].processors[:4]:
|
||||
sub = processor.process(sub)
|
||||
|
||||
entry_used = True
|
||||
else:
|
||||
# skip full entry
|
||||
continue
|
||||
if sub.strip():
|
||||
uppercase_count += sum(1 for char in sub if char.isupper())
|
||||
lowercase_count += sum(1 for char in sub if char.islower())
|
||||
entry_count += 1
|
||||
|
||||
if entry_used:
|
||||
entries_used += 1
|
||||
if entry_count >= MAXIMUM_ENTRIES:
|
||||
break
|
||||
|
||||
if entries_used == 40:
|
||||
break
|
||||
|
||||
return True
|
||||
total_character_count = lowercase_count + uppercase_count
|
||||
if total_character_count > 0 and uppercase_count > MINIMUM_UPPERCASE_COUNT:
|
||||
uppercase_percentage = uppercase_count * 100 / total_character_count
|
||||
logger.debug(f"Uppercase mod percentage is {uppercase_percentage:.2f}% vs minimum of {MINIMUM_UPPERCASE_PERCENTAGE}%")
|
||||
return uppercase_percentage >= MINIMUM_UPPERCASE_PERCENTAGE
|
||||
|
||||
return False
|
||||
|
||||
def modify(self, *mods):
|
||||
new_entries = []
|
||||
start = time.time()
|
||||
self.only_uppercase = self.detect_uppercase()
|
||||
self.mostly_uppercase = self.detect_uppercase()
|
||||
|
||||
if self.only_uppercase and self.debug:
|
||||
logger.debug("Full-uppercase subtitle found")
|
||||
if self.mostly_uppercase and self.debug:
|
||||
logger.debug("Mostly-uppercase subtitle found")
|
||||
|
||||
line_mods, non_line_mods, mods_used = self.prepare_mods(*mods)
|
||||
self.mods_used = mods_used
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue