bazarr/libs/textdistance/algorithms/simple.py
JayZed eb296e13c1
Improved global search function
* Use Hamming textdistance library

Used Hamming textdistance to sort by closest match.

* Global search UI improvements

Increased dropdown height to show more results initially (and which can also be scrolled into view).
Scrollbars will appear automatically as needed.
Remove dropdown when Search box is cleared.

* Added textdistance 4.6.2 library
2024-06-08 06:14:39 -04:00

127 lines
3.1 KiB
Python

from __future__ import annotations
# built-in
from itertools import takewhile
from typing import Sequence
# app
from .base import Base as _Base, BaseSimilarity as _BaseSimilarity
from .types import SimFunc
__all__ = [
'Prefix', 'Postfix', 'Length', 'Identity', 'Matrix',
'prefix', 'postfix', 'length', 'identity', 'matrix',
]
class Prefix(_BaseSimilarity):
"""prefix similarity
"""
def __init__(self, qval: int = 1, sim_test: SimFunc = None) -> None:
self.qval = qval
self.sim_test = sim_test or self._ident
def __call__(self, *sequences: Sequence) -> Sequence:
if not sequences:
return ''
sequences = self._get_sequences(*sequences)
def test(seq):
return self.sim_test(*seq)
result = [c[0] for c in takewhile(test, zip(*sequences))]
s = sequences[0]
if isinstance(s, str):
return ''.join(result)
if isinstance(s, bytes):
return b''.join(result)
return result
def similarity(self, *sequences: Sequence) -> int:
return len(self(*sequences))
class Postfix(Prefix):
"""postfix similarity
"""
def __call__(self, *sequences: Sequence) -> Sequence:
s = sequences[0]
sequences = [list(reversed(s)) for s in sequences]
result = reversed(super().__call__(*sequences))
if isinstance(s, str):
return ''.join(result)
if isinstance(s, bytes):
return b''.join(result)
return list(result)
class Length(_Base):
"""Length distance
"""
def __call__(self, *sequences: Sequence) -> int:
lengths = list(map(len, sequences))
return max(lengths) - min(lengths)
class Identity(_BaseSimilarity):
"""Identity similarity
"""
def maximum(self, *sequences: Sequence) -> int:
return 1
def __call__(self, *sequences: Sequence) -> int:
return int(self._ident(*sequences))
class Matrix(_BaseSimilarity):
"""Matrix similarity
"""
def __init__(
self,
mat=None,
mismatch_cost: int = 0,
match_cost: int = 1,
symmetric: bool = True,
external: bool = True,
) -> None:
self.mat = mat
self.mismatch_cost = mismatch_cost
self.match_cost = match_cost
self.symmetric = symmetric
def maximum(self, *sequences: Sequence) -> int:
return self.match_cost
def __call__(self, *sequences: Sequence) -> int:
if not self.mat:
if self._ident(*sequences):
return self.match_cost
return self.mismatch_cost
# search in matrix
if sequences in self.mat:
return self.mat[sequences]
# search in symmetric matrix
if self.symmetric:
sequences = tuple(reversed(sequences))
if sequences in self.mat:
return self.mat[sequences]
# if identity then return match_cost
if self._ident(*sequences):
return self.match_cost
# not found
return self.mismatch_cost
prefix = Prefix()
postfix = Postfix()
length = Length()
identity = Identity()
matrix = Matrix()