mirror of
https://github.com/morpheus65535/bazarr.git
synced 2025-06-29 01:43:10 -04:00
* Use Hamming textdistance library Used Hamming textdistance to sort by closest match. * Global search UI improvements Increased dropdown height to show more results initially (and which can also be scrolled into view). Scrollbars will appear automatically as needed. Remove dropdown when Search box is cleared. * Added textdistance 4.6.2 library
191 lines
6.2 KiB
Python
191 lines
6.2 KiB
Python
from __future__ import annotations
|
|
|
|
# built-in
|
|
from collections import Counter
|
|
from contextlib import suppress
|
|
from typing import Sequence, TypeVar
|
|
|
|
# app
|
|
from ..libraries import prototype
|
|
from ..utils import find_ngrams
|
|
|
|
|
|
libraries = prototype.clone()
|
|
libraries.optimize()
|
|
T = TypeVar('T')
|
|
|
|
|
|
class Base:
|
|
def __init__(self, qval: int = 1, external: bool = True) -> None:
|
|
self.qval = qval
|
|
self.external = external
|
|
|
|
def __call__(self, *sequences: Sequence[object]) -> float:
|
|
raise NotImplementedError
|
|
|
|
@staticmethod
|
|
def maximum(*sequences: Sequence[object]) -> float:
|
|
"""Get maximum possible value
|
|
"""
|
|
return max(map(len, sequences))
|
|
|
|
def distance(self, *sequences: Sequence[object]) -> float:
|
|
"""Get distance between sequences
|
|
"""
|
|
return self(*sequences)
|
|
|
|
def similarity(self, *sequences: Sequence[object]) -> float:
|
|
"""Get sequences similarity.
|
|
|
|
similarity = maximum - distance
|
|
"""
|
|
return self.maximum(*sequences) - self.distance(*sequences)
|
|
|
|
def normalized_distance(self, *sequences: Sequence[object]) -> float:
|
|
"""Get distance from 0 to 1
|
|
"""
|
|
maximum = self.maximum(*sequences)
|
|
if maximum == 0:
|
|
return 0
|
|
return self.distance(*sequences) / maximum
|
|
|
|
def normalized_similarity(self, *sequences: Sequence[object]) -> float:
|
|
"""Get similarity from 0 to 1
|
|
|
|
normalized_similarity = 1 - normalized_distance
|
|
"""
|
|
return 1 - self.normalized_distance(*sequences)
|
|
|
|
def external_answer(self, *sequences: Sequence[object]) -> float | None:
|
|
"""Try to get answer from known external libraries.
|
|
"""
|
|
# if this feature disabled
|
|
if not getattr(self, 'external', False):
|
|
return None
|
|
# all external libs don't support test_func
|
|
test_func = getattr(self, 'test_func', self._ident)
|
|
if test_func is not self._ident:
|
|
return None
|
|
# try to get external libs for algorithm
|
|
libs = libraries.get_libs(self.__class__.__name__)
|
|
for lib in libs:
|
|
# if conditions not satisfied
|
|
if not lib.check_conditions(self, *sequences):
|
|
continue
|
|
# if library is not installed yet
|
|
func = lib.get_function()
|
|
if func is None:
|
|
continue
|
|
prepared_sequences = lib.prepare(*sequences)
|
|
# fail side libraries silently and try next libs
|
|
with suppress(Exception):
|
|
return func(*prepared_sequences)
|
|
return None
|
|
|
|
def quick_answer(self, *sequences: Sequence[object]) -> float | None:
|
|
"""Try to get answer quick without main implementation calling.
|
|
|
|
If no sequences, 1 sequence or all sequences are equal then return 0.
|
|
If any sequence are empty then return maximum.
|
|
And in finish try to get external answer.
|
|
"""
|
|
if not sequences:
|
|
return 0
|
|
if len(sequences) == 1:
|
|
return 0
|
|
if self._ident(*sequences):
|
|
return 0
|
|
if not all(sequences):
|
|
return self.maximum(*sequences)
|
|
# try get answer from external libs
|
|
return self.external_answer(*sequences)
|
|
|
|
@staticmethod
|
|
def _ident(*elements: object) -> bool:
|
|
"""Return True if all sequences are equal.
|
|
"""
|
|
try:
|
|
# for hashable elements
|
|
return len(set(elements)) == 1
|
|
except TypeError:
|
|
# for unhashable elements
|
|
for e1, e2 in zip(elements, elements[1:]):
|
|
if e1 != e2:
|
|
return False
|
|
return True
|
|
|
|
def _get_sequences(self, *sequences: Sequence[object]) -> list:
|
|
"""Prepare sequences.
|
|
|
|
qval=None: split text by words
|
|
qval=1: do not split sequences. For text this is mean comparing by letters.
|
|
qval>1: split sequences by q-grams
|
|
"""
|
|
# by words
|
|
if not self.qval:
|
|
return [s.split() for s in sequences] # type: ignore[attr-defined]
|
|
# by chars
|
|
if self.qval == 1:
|
|
return list(sequences)
|
|
# by n-grams
|
|
return [find_ngrams(s, self.qval) for s in sequences]
|
|
|
|
def _get_counters(self, *sequences: Sequence[object]) -> list[Counter]:
|
|
"""Prepare sequences and convert it to Counters.
|
|
"""
|
|
# already Counters
|
|
if all(isinstance(s, Counter) for s in sequences):
|
|
return list(sequences) # type: ignore[arg-type]
|
|
return [Counter(s) for s in self._get_sequences(*sequences)]
|
|
|
|
def _intersect_counters(self, *sequences: Counter[T]) -> Counter[T]:
|
|
intersection = sequences[0].copy()
|
|
for s in sequences[1:]:
|
|
intersection &= s
|
|
return intersection
|
|
|
|
def _union_counters(self, *sequences: Counter[T]) -> Counter[T]:
|
|
union = sequences[0].copy()
|
|
for s in sequences[1:]:
|
|
union |= s
|
|
return union
|
|
|
|
def _sum_counters(self, *sequences: Counter[T]) -> Counter[T]:
|
|
result = sequences[0].copy()
|
|
for s in sequences[1:]:
|
|
result += s
|
|
return result
|
|
|
|
def _count_counters(self, counter: Counter) -> int:
|
|
"""Return all elements count from Counter
|
|
"""
|
|
if getattr(self, 'as_set', False):
|
|
return len(set(counter))
|
|
else:
|
|
return sum(counter.values())
|
|
|
|
def __repr__(self) -> str:
|
|
return '{name}({data})'.format(
|
|
name=type(self).__name__,
|
|
data=self.__dict__,
|
|
)
|
|
|
|
|
|
class BaseSimilarity(Base):
|
|
def distance(self, *sequences: Sequence[object]) -> float:
|
|
return self.maximum(*sequences) - self.similarity(*sequences)
|
|
|
|
def similarity(self, *sequences: Sequence[object]) -> float:
|
|
return self(*sequences)
|
|
|
|
def quick_answer(self, *sequences: Sequence[object]) -> float | None:
|
|
if not sequences:
|
|
return self.maximum(*sequences)
|
|
if len(sequences) == 1:
|
|
return self.maximum(*sequences)
|
|
if self._ident(*sequences):
|
|
return self.maximum(*sequences)
|
|
if not all(sequences):
|
|
return 0
|
|
# try get answer from external libs
|
|
return self.external_answer(*sequences)
|