mirror of
https://github.com/morpheus65535/bazarr.git
synced 2025-04-23 22:27:17 -04:00
Downgrade auditok to version 0.1.5
ffsubsync pinned auditok to 0.1.5. We missed this when upgrading ffsubsync and auditok. Since we dont run pip to install the libraries, there is no version checks
This commit is contained in:
parent
08e50a8348
commit
30ef713fa2
13 changed files with 2219 additions and 4566 deletions
|
@ -2,16 +2,20 @@
|
|||
:author:
|
||||
|
||||
Amine SEHILI <amine.sehili@gmail.com>
|
||||
2015-2021
|
||||
2015-2016
|
||||
|
||||
:License:
|
||||
|
||||
This package is published under the MIT license.
|
||||
This package is published under GNU GPL Version 3.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from .core import *
|
||||
from .io import *
|
||||
from .util import *
|
||||
from . import dataset
|
||||
from .exceptions import *
|
||||
|
||||
__version__ = "0.2.0"
|
||||
__version__ = "0.1.5"
|
||||
|
||||
|
||||
|
|
1155
libs/auditok/cmdline.py
Normal file → Executable file
1155
libs/auditok/cmdline.py
Normal file → Executable file
File diff suppressed because it is too large
Load diff
|
@ -1,126 +0,0 @@
|
|||
import sys
|
||||
import logging
|
||||
from collections import namedtuple
|
||||
from . import workers
|
||||
from .util import AudioDataSource
|
||||
from .io import player_for
|
||||
|
||||
_AUDITOK_LOGGER = "AUDITOK_LOGGER"
|
||||
KeywordArguments = namedtuple(
|
||||
"KeywordArguments", ["io", "split", "miscellaneous"]
|
||||
)
|
||||
|
||||
|
||||
def make_kwargs(args_ns):
|
||||
if args_ns.save_stream is None:
|
||||
record = args_ns.plot or (args_ns.save_image is not None)
|
||||
else:
|
||||
record = False
|
||||
try:
|
||||
use_channel = int(args_ns.use_channel)
|
||||
except (ValueError, TypeError):
|
||||
use_channel = args_ns.use_channel
|
||||
|
||||
io_kwargs = {
|
||||
"input": args_ns.input,
|
||||
"audio_format": args_ns.input_format,
|
||||
"max_read": args_ns.max_read,
|
||||
"block_dur": args_ns.analysis_window,
|
||||
"sampling_rate": args_ns.sampling_rate,
|
||||
"sample_width": args_ns.sample_width,
|
||||
"channels": args_ns.channels,
|
||||
"use_channel": use_channel,
|
||||
"save_stream": args_ns.save_stream,
|
||||
"save_detections_as": args_ns.save_detections_as,
|
||||
"export_format": args_ns.output_format,
|
||||
"large_file": args_ns.large_file,
|
||||
"frames_per_buffer": args_ns.frame_per_buffer,
|
||||
"input_device_index": args_ns.input_device_index,
|
||||
"record": record,
|
||||
}
|
||||
|
||||
split_kwargs = {
|
||||
"min_dur": args_ns.min_duration,
|
||||
"max_dur": args_ns.max_duration,
|
||||
"max_silence": args_ns.max_silence,
|
||||
"drop_trailing_silence": args_ns.drop_trailing_silence,
|
||||
"strict_min_dur": args_ns.strict_min_duration,
|
||||
"energy_threshold": args_ns.energy_threshold,
|
||||
}
|
||||
|
||||
miscellaneous = {
|
||||
"echo": args_ns.echo,
|
||||
"progress_bar": args_ns.progress_bar,
|
||||
"command": args_ns.command,
|
||||
"quiet": args_ns.quiet,
|
||||
"printf": args_ns.printf,
|
||||
"time_format": args_ns.time_format,
|
||||
"timestamp_format": args_ns.timestamp_format,
|
||||
}
|
||||
return KeywordArguments(io_kwargs, split_kwargs, miscellaneous)
|
||||
|
||||
|
||||
def make_logger(stderr=False, file=None, name=_AUDITOK_LOGGER):
|
||||
if not stderr and file is None:
|
||||
return None
|
||||
logger = logging.getLogger(name)
|
||||
logger.setLevel(logging.INFO)
|
||||
if stderr:
|
||||
handler = logging.StreamHandler(sys.stderr)
|
||||
handler.setLevel(logging.INFO)
|
||||
logger.addHandler(handler)
|
||||
|
||||
if file is not None:
|
||||
handler = logging.FileHandler(file, "w")
|
||||
fmt = logging.Formatter("[%(asctime)s] | %(message)s")
|
||||
handler.setFormatter(fmt)
|
||||
handler.setLevel(logging.INFO)
|
||||
logger.addHandler(handler)
|
||||
return logger
|
||||
|
||||
|
||||
def initialize_workers(logger=None, **kwargs):
|
||||
observers = []
|
||||
reader = AudioDataSource(source=kwargs["input"], **kwargs)
|
||||
if kwargs["save_stream"] is not None:
|
||||
reader = workers.StreamSaverWorker(
|
||||
reader,
|
||||
filename=kwargs["save_stream"],
|
||||
export_format=kwargs["export_format"],
|
||||
)
|
||||
reader.start()
|
||||
|
||||
if kwargs["save_detections_as"] is not None:
|
||||
worker = workers.RegionSaverWorker(
|
||||
kwargs["save_detections_as"],
|
||||
kwargs["export_format"],
|
||||
logger=logger,
|
||||
)
|
||||
observers.append(worker)
|
||||
|
||||
if kwargs["echo"]:
|
||||
player = player_for(reader)
|
||||
worker = workers.PlayerWorker(
|
||||
player, progress_bar=kwargs["progress_bar"], logger=logger
|
||||
)
|
||||
observers.append(worker)
|
||||
|
||||
if kwargs["command"] is not None:
|
||||
worker = workers.CommandLineWorker(
|
||||
command=kwargs["command"], logger=logger
|
||||
)
|
||||
observers.append(worker)
|
||||
|
||||
if not kwargs["quiet"]:
|
||||
print_format = (
|
||||
kwargs["printf"]
|
||||
.replace("\\n", "\n")
|
||||
.replace("\\t", "\t")
|
||||
.replace("\\r", "\r")
|
||||
)
|
||||
worker = workers.PrintWorker(
|
||||
print_format, kwargs["time_format"], kwargs["timestamp_format"]
|
||||
)
|
||||
observers.append(worker)
|
||||
|
||||
return reader, observers
|
1652
libs/auditok/core.py
1652
libs/auditok/core.py
File diff suppressed because it is too large
Load diff
|
@ -1,31 +1,19 @@
|
|||
"""
|
||||
This module contains links to audio files that can be used for test purposes.
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
one_to_six_arabic_16000_mono_bc_noise
|
||||
was_der_mensch_saet_mono_44100_lead_trail_silence
|
||||
This module contains links to audio files you can use for test purposes.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
__all__ = [
|
||||
"one_to_six_arabic_16000_mono_bc_noise",
|
||||
"was_der_mensch_saet_mono_44100_lead_trail_silence",
|
||||
]
|
||||
__all__ = ["one_to_six_arabic_16000_mono_bc_noise", "was_der_mensch_saet_mono_44100_lead_trail_silence"]
|
||||
|
||||
_current_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
one_to_six_arabic_16000_mono_bc_noise = "{cd}{sep}data{sep}1to6arabic_\
|
||||
16000_mono_bc_noise.wav".format(
|
||||
cd=_current_dir, sep=os.path.sep
|
||||
)
|
||||
16000_mono_bc_noise.wav".format(cd=_current_dir, sep=os.path.sep)
|
||||
"""A wave file that contains a pronunciation of Arabic numbers from 1 to 6"""
|
||||
|
||||
|
||||
was_der_mensch_saet_mono_44100_lead_trail_silence = "{cd}{sep}data{sep}was_\
|
||||
der_mensch_saet_das_wird_er_vielfach_ernten_44100Hz_mono_lead_trail_\
|
||||
silence.wav".format(
|
||||
cd=_current_dir, sep=os.path.sep
|
||||
)
|
||||
"""A wave file that contains a sentence with a long leading and trailing silence"""
|
||||
silence.wav".format(cd=_current_dir, sep=os.path.sep)
|
||||
""" A wave file that contains a sentence between long leading and trailing periods of silence"""
|
|
@ -1,41 +1,9 @@
|
|||
"""
|
||||
November 2015
|
||||
@author: Amine SEHILI <amine.sehili@gmail.com>
|
||||
"""
|
||||
|
||||
class DuplicateArgument(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class TooSamllBlockDuration(ValueError):
|
||||
"""Raised when block_dur results in a block_size smaller than one sample."""
|
||||
|
||||
def __init__(self, message, block_dur, sampling_rate):
|
||||
self.block_dur = block_dur
|
||||
self.sampling_rate = sampling_rate
|
||||
super(TooSamllBlockDuration, self).__init__(message)
|
||||
|
||||
|
||||
class TimeFormatError(Exception):
|
||||
"""Raised when a duration formatting directive is unknown."""
|
||||
|
||||
|
||||
class EndOfProcessing(Exception):
|
||||
"""Raised within command line script's main function to jump to
|
||||
postprocessing code."""
|
||||
|
||||
|
||||
class AudioIOError(Exception):
|
||||
"""Raised when a compressed audio file cannot be loaded or when trying
|
||||
to read from a not yet open AudioSource"""
|
||||
|
||||
|
||||
class AudioParameterError(AudioIOError):
|
||||
"""Raised when one audio parameter is missing when loading raw data or
|
||||
saving data to a format other than raw. Also raised when an audio
|
||||
parameter has a wrong value."""
|
||||
|
||||
|
||||
class AudioEncodingError(Exception):
|
||||
"""Raised if audio data can not be encoded in the provided format"""
|
||||
|
||||
|
||||
class AudioEncodingWarning(RuntimeWarning):
|
||||
"""Raised if audio data can not be encoded in the provided format
|
||||
but saved as wav.
|
||||
"""
|
||||
|
|
1252
libs/auditok/io.py
1252
libs/auditok/io.py
File diff suppressed because it is too large
Load diff
|
@ -1,150 +0,0 @@
|
|||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
AUDITOK_PLOT_THEME = {
|
||||
"figure": {"facecolor": "#482a36", "alpha": 0.2},
|
||||
"plot": {"facecolor": "#282a36"},
|
||||
"energy_threshold": {
|
||||
"color": "#e31f8f",
|
||||
"linestyle": "--",
|
||||
"linewidth": 1,
|
||||
},
|
||||
"signal": {"color": "#40d970", "linestyle": "-", "linewidth": 1},
|
||||
"detections": {
|
||||
"facecolor": "#777777",
|
||||
"edgecolor": "#ff8c1a",
|
||||
"linewidth": 1,
|
||||
"alpha": 0.75,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _make_time_axis(nb_samples, sampling_rate):
|
||||
sample_duration = 1 / sampling_rate
|
||||
x = np.linspace(0, sample_duration * (nb_samples - 1), nb_samples)
|
||||
return x
|
||||
|
||||
|
||||
def _plot_line(x, y, theme, xlabel=None, ylabel=None, **kwargs):
|
||||
color = theme.get("color", theme.get("c"))
|
||||
ls = theme.get("linestyle", theme.get("ls"))
|
||||
lw = theme.get("linewidth", theme.get("lw"))
|
||||
plt.plot(x, y, c=color, ls=ls, lw=lw, **kwargs)
|
||||
plt.xlabel(xlabel, fontsize=8)
|
||||
plt.ylabel(ylabel, fontsize=8)
|
||||
|
||||
|
||||
def _plot_detections(subplot, detections, theme):
|
||||
fc = theme.get("facecolor", theme.get("fc"))
|
||||
ec = theme.get("edgecolor", theme.get("ec"))
|
||||
ls = theme.get("linestyle", theme.get("ls"))
|
||||
lw = theme.get("linewidth", theme.get("lw"))
|
||||
alpha = theme.get("alpha")
|
||||
for (start, end) in detections:
|
||||
subplot.axvspan(start, end, fc=fc, ec=ec, ls=ls, lw=lw, alpha=alpha)
|
||||
|
||||
|
||||
def plot(
|
||||
audio_region,
|
||||
scale_signal=True,
|
||||
detections=None,
|
||||
energy_threshold=None,
|
||||
show=True,
|
||||
figsize=None,
|
||||
save_as=None,
|
||||
dpi=120,
|
||||
theme="auditok",
|
||||
):
|
||||
y = np.asarray(audio_region)
|
||||
if len(y.shape) == 1:
|
||||
y = y.reshape(1, -1)
|
||||
nb_subplots, nb_samples = y.shape
|
||||
sampling_rate = audio_region.sampling_rate
|
||||
time_axis = _make_time_axis(nb_samples, sampling_rate)
|
||||
if energy_threshold is not None:
|
||||
eth_log10 = energy_threshold * np.log(10) / 10
|
||||
amplitude_threshold = np.sqrt(np.exp(eth_log10))
|
||||
else:
|
||||
amplitude_threshold = None
|
||||
if detections is None:
|
||||
detections = []
|
||||
else:
|
||||
# End of detection corresponds to the end of the last sample but
|
||||
# to stay compatible with the time axis of signal plotting we want end
|
||||
# of detection to correspond to the *start* of the that last sample.
|
||||
detections = [
|
||||
(start, end - (1 / sampling_rate)) for (start, end) in detections
|
||||
]
|
||||
if theme == "auditok":
|
||||
theme = AUDITOK_PLOT_THEME
|
||||
|
||||
fig = plt.figure(figsize=figsize, dpi=dpi)
|
||||
fig_theme = theme.get("figure", theme.get("fig", {}))
|
||||
fig_fc = fig_theme.get("facecolor", fig_theme.get("ffc"))
|
||||
fig_alpha = fig_theme.get("alpha", 1)
|
||||
fig.patch.set_facecolor(fig_fc)
|
||||
fig.patch.set_alpha(fig_alpha)
|
||||
|
||||
plot_theme = theme.get("plot", {})
|
||||
plot_fc = plot_theme.get("facecolor", plot_theme.get("pfc"))
|
||||
|
||||
if nb_subplots > 2 and nb_subplots % 2 == 0:
|
||||
nb_rows = nb_subplots // 2
|
||||
nb_columns = 2
|
||||
else:
|
||||
nb_rows = nb_subplots
|
||||
nb_columns = 1
|
||||
|
||||
for sid, samples in enumerate(y, 1):
|
||||
ax = fig.add_subplot(nb_rows, nb_columns, sid)
|
||||
ax.set_facecolor(plot_fc)
|
||||
if scale_signal:
|
||||
std = samples.std()
|
||||
if std > 0:
|
||||
mean = samples.mean()
|
||||
std = samples.std()
|
||||
samples = (samples - mean) / std
|
||||
max_ = samples.max()
|
||||
plt.ylim(-1.5 * max_, 1.5 * max_)
|
||||
if amplitude_threshold is not None:
|
||||
if scale_signal and std > 0:
|
||||
amp_th = (amplitude_threshold - mean) / std
|
||||
else:
|
||||
amp_th = amplitude_threshold
|
||||
eth_theme = theme.get("energy_threshold", theme.get("eth", {}))
|
||||
_plot_line(
|
||||
[time_axis[0], time_axis[-1]],
|
||||
[amp_th] * 2,
|
||||
eth_theme,
|
||||
label="Detection threshold",
|
||||
)
|
||||
if sid == 1:
|
||||
legend = plt.legend(
|
||||
["Detection threshold"],
|
||||
facecolor=fig_fc,
|
||||
framealpha=0.1,
|
||||
bbox_to_anchor=(0.0, 1.15, 1.0, 0.102),
|
||||
loc=2,
|
||||
)
|
||||
legend = plt.gca().add_artist(legend)
|
||||
|
||||
signal_theme = theme.get("signal", {})
|
||||
_plot_line(
|
||||
time_axis,
|
||||
samples,
|
||||
signal_theme,
|
||||
xlabel="Time (seconds)",
|
||||
ylabel="Signal{}".format(" (scaled)" if scale_signal else ""),
|
||||
)
|
||||
detections_theme = theme.get("detections", {})
|
||||
_plot_detections(ax, detections, detections_theme)
|
||||
plt.title("Channel {}".format(sid), fontsize=10)
|
||||
|
||||
plt.xticks(fontsize=8)
|
||||
plt.yticks(fontsize=8)
|
||||
plt.tight_layout()
|
||||
|
||||
if save_as is not None:
|
||||
plt.savefig(save_as, dpi=dpi)
|
||||
if show:
|
||||
plt.show()
|
|
@ -1,179 +0,0 @@
|
|||
"""
|
||||
Module for basic audio signal processing and array operations.
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
to_array
|
||||
extract_single_channel
|
||||
compute_average_channel
|
||||
compute_average_channel_stereo
|
||||
separate_channels
|
||||
calculate_energy_single_channel
|
||||
calculate_energy_multichannel
|
||||
"""
|
||||
from array import array as array_
|
||||
import audioop
|
||||
import math
|
||||
|
||||
FORMAT = {1: "b", 2: "h", 4: "i"}
|
||||
_EPSILON = 1e-10
|
||||
|
||||
|
||||
def to_array(data, sample_width, channels):
|
||||
"""Extract individual channels of audio data and return a list of arrays of
|
||||
numeric samples. This will always return a list of `array.array` objects
|
||||
(one per channel) even if audio data is mono.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : bytes
|
||||
raw audio data.
|
||||
sample_width : int
|
||||
size in bytes of one audio sample (one channel considered).
|
||||
|
||||
Returns
|
||||
-------
|
||||
samples_arrays : list
|
||||
list of arrays of audio samples.
|
||||
"""
|
||||
fmt = FORMAT[sample_width]
|
||||
if channels == 1:
|
||||
return [array_(fmt, data)]
|
||||
return separate_channels(data, fmt, channels)
|
||||
|
||||
|
||||
def extract_single_channel(data, fmt, channels, selected):
|
||||
samples = array_(fmt, data)
|
||||
return samples[selected::channels]
|
||||
|
||||
|
||||
def compute_average_channel(data, fmt, channels):
|
||||
"""
|
||||
Compute and return average channel of multi-channel audio data. If the
|
||||
number of channels is 2, use :func:`compute_average_channel_stereo` (much
|
||||
faster). This function uses satandard `array` module to convert `bytes` data
|
||||
into an array of numeric values.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : bytes
|
||||
multi-channel audio data to mix down.
|
||||
fmt : str
|
||||
format (single character) to pass to `array.array` to convert `data`
|
||||
into an array of samples. This should be "b" if audio data's sample width
|
||||
is 1, "h" if it's 2 and "i" if it's 4.
|
||||
channels : int
|
||||
number of channels of audio data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
mono_audio : bytes
|
||||
mixed down audio data.
|
||||
"""
|
||||
all_channels = array_(fmt, data)
|
||||
mono_channels = [
|
||||
array_(fmt, all_channels[ch::channels]) for ch in range(channels)
|
||||
]
|
||||
avg_arr = array_(
|
||||
fmt,
|
||||
(round(sum(samples) / channels) for samples in zip(*mono_channels)),
|
||||
)
|
||||
return avg_arr
|
||||
|
||||
|
||||
def compute_average_channel_stereo(data, sample_width):
|
||||
"""Compute and return average channel of stereo audio data. This function
|
||||
should be used when the number of channels is exactly 2 because in that
|
||||
case we can use standard `audioop` module which *much* faster then calling
|
||||
:func:`compute_average_channel`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : bytes
|
||||
2-channel audio data to mix down.
|
||||
sample_width : int
|
||||
size in bytes of one audio sample (one channel considered).
|
||||
|
||||
Returns
|
||||
-------
|
||||
mono_audio : bytes
|
||||
mixed down audio data.
|
||||
"""
|
||||
fmt = FORMAT[sample_width]
|
||||
arr = array_(fmt, audioop.tomono(data, sample_width, 0.5, 0.5))
|
||||
return arr
|
||||
|
||||
|
||||
def separate_channels(data, fmt, channels):
|
||||
"""Create a list of arrays of audio samples (`array.array` objects), one for
|
||||
each channel.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : bytes
|
||||
multi-channel audio data to mix down.
|
||||
fmt : str
|
||||
format (single character) to pass to `array.array` to convert `data`
|
||||
into an array of samples. This should be "b" if audio data's sample width
|
||||
is 1, "h" if it's 2 and "i" if it's 4.
|
||||
channels : int
|
||||
number of channels of audio data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
channels_arr : list
|
||||
list of audio channels, each as a standard `array.array`.
|
||||
"""
|
||||
all_channels = array_(fmt, data)
|
||||
mono_channels = [
|
||||
array_(fmt, all_channels[ch::channels]) for ch in range(channels)
|
||||
]
|
||||
return mono_channels
|
||||
|
||||
|
||||
def calculate_energy_single_channel(data, sample_width):
|
||||
"""Calculate the energy of mono audio data. Energy is computed as:
|
||||
|
||||
.. math:: energy = 20 \log(\sqrt({1}/{N}\sum_{i}^{N}{a_i}^2)) % # noqa: W605
|
||||
|
||||
where `a_i` is the i-th audio sample and `N` is the number of audio samples
|
||||
in data.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : bytes
|
||||
single-channel audio data.
|
||||
sample_width : int
|
||||
size in bytes of one audio sample.
|
||||
|
||||
Returns
|
||||
-------
|
||||
energy : float
|
||||
energy of audio signal.
|
||||
"""
|
||||
energy_sqrt = max(audioop.rms(data, sample_width), _EPSILON)
|
||||
return 20 * math.log10(energy_sqrt)
|
||||
|
||||
|
||||
def calculate_energy_multichannel(x, sample_width, aggregation_fn=max):
|
||||
"""Calculate the energy of multi-channel audio data. Energy is calculated
|
||||
channel-wise. An aggregation function is applied to the resulting energies
|
||||
(default: `max`). Also see :func:`calculate_energy_single_channel`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : bytes
|
||||
single-channel audio data.
|
||||
sample_width : int
|
||||
size in bytes of one audio sample (one channel considered).
|
||||
aggregation_fn : callable, default: max
|
||||
aggregation function to apply to the resulting per-channel energies.
|
||||
|
||||
Returns
|
||||
-------
|
||||
energy : float
|
||||
aggregated energy of multi-channel audio signal.
|
||||
"""
|
||||
energies = (calculate_energy_single_channel(xi, sample_width) for xi in x)
|
||||
return aggregation_fn(energies)
|
|
@ -1,30 +0,0 @@
|
|||
import numpy as np
|
||||
from .signal import (
|
||||
compute_average_channel_stereo,
|
||||
calculate_energy_single_channel,
|
||||
calculate_energy_multichannel,
|
||||
)
|
||||
|
||||
FORMAT = {1: np.int8, 2: np.int16, 4: np.int32}
|
||||
|
||||
|
||||
def to_array(data, sample_width, channels):
|
||||
fmt = FORMAT[sample_width]
|
||||
if channels == 1:
|
||||
return np.frombuffer(data, dtype=fmt).astype(np.float64)
|
||||
return separate_channels(data, fmt, channels).astype(np.float64)
|
||||
|
||||
|
||||
def extract_single_channel(data, fmt, channels, selected):
|
||||
samples = np.frombuffer(data, dtype=fmt)
|
||||
return np.asanyarray(samples[selected::channels], order="C")
|
||||
|
||||
|
||||
def compute_average_channel(data, fmt, channels):
|
||||
array = np.frombuffer(data, dtype=fmt).astype(np.float64)
|
||||
return array.reshape(-1, channels).mean(axis=1).round().astype(fmt)
|
||||
|
||||
|
||||
def separate_channels(data, fmt, channels):
|
||||
array = np.frombuffer(data, dtype=fmt)
|
||||
return np.asanyarray(array.reshape(-1, channels).T, order="C")
|
1736
libs/auditok/util.py
1736
libs/auditok/util.py
File diff suppressed because it is too large
Load diff
|
@ -1,427 +0,0 @@
|
|||
import os
|
||||
import sys
|
||||
from tempfile import NamedTemporaryFile
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from threading import Thread
|
||||
from datetime import datetime, timedelta
|
||||
from collections import namedtuple
|
||||
import wave
|
||||
import subprocess
|
||||
from queue import Queue, Empty
|
||||
from .io import _guess_audio_format
|
||||
from .util import AudioDataSource, make_duration_formatter
|
||||
from .core import split
|
||||
from .exceptions import (
|
||||
EndOfProcessing,
|
||||
AudioEncodingError,
|
||||
AudioEncodingWarning,
|
||||
)
|
||||
|
||||
|
||||
_STOP_PROCESSING = "STOP_PROCESSING"
|
||||
_Detection = namedtuple("_Detection", "id start end duration")
|
||||
|
||||
|
||||
def _run_subprocess(command):
|
||||
try:
|
||||
with subprocess.Popen(
|
||||
command,
|
||||
stdin=open(os.devnull, "rb"),
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
) as proc:
|
||||
stdout, stderr = proc.communicate()
|
||||
return proc.returncode, stdout, stderr
|
||||
except Exception:
|
||||
err_msg = "Couldn't export audio using command: '{}'".format(command)
|
||||
raise AudioEncodingError(err_msg)
|
||||
|
||||
|
||||
class Worker(Thread, metaclass=ABCMeta):
|
||||
def __init__(self, timeout=0.5, logger=None):
|
||||
self._timeout = timeout
|
||||
self._logger = logger
|
||||
self._inbox = Queue()
|
||||
Thread.__init__(self)
|
||||
|
||||
def run(self):
|
||||
while True:
|
||||
message = self._get_message()
|
||||
if message == _STOP_PROCESSING:
|
||||
break
|
||||
if message is not None:
|
||||
self._process_message(message)
|
||||
self._post_process()
|
||||
|
||||
@abstractmethod
|
||||
def _process_message(self, message):
|
||||
"""Process incoming messages"""
|
||||
|
||||
def _post_process(self):
|
||||
pass
|
||||
|
||||
def _log(self, message):
|
||||
self._logger.info(message)
|
||||
|
||||
def _stop_requested(self):
|
||||
try:
|
||||
message = self._inbox.get_nowait()
|
||||
if message == _STOP_PROCESSING:
|
||||
return True
|
||||
except Empty:
|
||||
return False
|
||||
|
||||
def stop(self):
|
||||
self.send(_STOP_PROCESSING)
|
||||
self.join()
|
||||
|
||||
def send(self, message):
|
||||
self._inbox.put(message)
|
||||
|
||||
def _get_message(self):
|
||||
try:
|
||||
message = self._inbox.get(timeout=self._timeout)
|
||||
return message
|
||||
except Empty:
|
||||
return None
|
||||
|
||||
|
||||
class TokenizerWorker(Worker, AudioDataSource):
|
||||
def __init__(self, reader, observers=None, logger=None, **kwargs):
|
||||
self._observers = observers if observers is not None else []
|
||||
self._reader = reader
|
||||
self._audio_region_gen = split(self, **kwargs)
|
||||
self._detections = []
|
||||
self._log_format = "[DET]: Detection {0.id} (start: {0.start:.3f}, "
|
||||
self._log_format += "end: {0.end:.3f}, duration: {0.duration:.3f})"
|
||||
Worker.__init__(self, timeout=0.2, logger=logger)
|
||||
|
||||
def _process_message(self):
|
||||
pass
|
||||
|
||||
@property
|
||||
def detections(self):
|
||||
return self._detections
|
||||
|
||||
def _notify_observers(self, message):
|
||||
for observer in self._observers:
|
||||
observer.send(message)
|
||||
|
||||
def run(self):
|
||||
self._reader.open()
|
||||
start_processing_timestamp = datetime.now()
|
||||
for _id, audio_region in enumerate(self._audio_region_gen, start=1):
|
||||
timestamp = start_processing_timestamp + timedelta(
|
||||
seconds=audio_region.meta.start
|
||||
)
|
||||
audio_region.meta.timestamp = timestamp
|
||||
detection = _Detection(
|
||||
_id,
|
||||
audio_region.meta.start,
|
||||
audio_region.meta.end,
|
||||
audio_region.duration,
|
||||
)
|
||||
self._detections.append(detection)
|
||||
if self._logger is not None:
|
||||
message = self._log_format.format(detection)
|
||||
self._log(message)
|
||||
self._notify_observers((_id, audio_region))
|
||||
self._notify_observers(_STOP_PROCESSING)
|
||||
self._reader.close()
|
||||
|
||||
def start_all(self):
|
||||
for observer in self._observers:
|
||||
observer.start()
|
||||
self.start()
|
||||
|
||||
def stop_all(self):
|
||||
self.stop()
|
||||
for observer in self._observers:
|
||||
observer.stop()
|
||||
self._reader.close()
|
||||
|
||||
def read(self):
|
||||
if self._stop_requested():
|
||||
return None
|
||||
else:
|
||||
return self._reader.read()
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self._reader, name)
|
||||
|
||||
|
||||
class StreamSaverWorker(Worker):
|
||||
def __init__(
|
||||
self,
|
||||
audio_reader,
|
||||
filename,
|
||||
export_format=None,
|
||||
cache_size_sec=0.5,
|
||||
timeout=0.2,
|
||||
):
|
||||
self._reader = audio_reader
|
||||
sample_size_bytes = self._reader.sw * self._reader.ch
|
||||
self._cache_size = cache_size_sec * self._reader.sr * sample_size_bytes
|
||||
self._output_filename = filename
|
||||
self._export_format = _guess_audio_format(export_format, filename)
|
||||
if self._export_format is None:
|
||||
self._export_format = "wav"
|
||||
self._init_output_stream()
|
||||
self._exported = False
|
||||
self._cache = []
|
||||
self._total_cached = 0
|
||||
Worker.__init__(self, timeout=timeout)
|
||||
|
||||
def _get_non_existent_filename(self):
|
||||
filename = self._output_filename + ".wav"
|
||||
i = 0
|
||||
while os.path.exists(filename):
|
||||
i += 1
|
||||
filename = self._output_filename + "({}).wav".format(i)
|
||||
return filename
|
||||
|
||||
def _init_output_stream(self):
|
||||
if self._export_format != "wav":
|
||||
self._tmp_output_filename = self._get_non_existent_filename()
|
||||
else:
|
||||
self._tmp_output_filename = self._output_filename
|
||||
self._wfp = wave.open(self._tmp_output_filename, "wb")
|
||||
self._wfp.setframerate(self._reader.sr)
|
||||
self._wfp.setsampwidth(self._reader.sw)
|
||||
self._wfp.setnchannels(self._reader.ch)
|
||||
|
||||
@property
|
||||
def sr(self):
|
||||
return self._reader.sampling_rate
|
||||
|
||||
@property
|
||||
def sw(self):
|
||||
return self._reader.sample_width
|
||||
|
||||
@property
|
||||
def ch(self):
|
||||
return self._reader.channels
|
||||
|
||||
def __del__(self):
|
||||
self._post_process()
|
||||
|
||||
if (
|
||||
(self._tmp_output_filename != self._output_filename)
|
||||
and self._exported
|
||||
and os.path.exists(self._tmp_output_filename)
|
||||
):
|
||||
os.remove(self._tmp_output_filename)
|
||||
|
||||
def _process_message(self, data):
|
||||
self._cache.append(data)
|
||||
self._total_cached += len(data)
|
||||
if self._total_cached >= self._cache_size:
|
||||
self._write_cached_data()
|
||||
|
||||
def _post_process(self):
|
||||
while True:
|
||||
try:
|
||||
data = self._inbox.get_nowait()
|
||||
if data != _STOP_PROCESSING:
|
||||
self._cache.append(data)
|
||||
self._total_cached += len(data)
|
||||
except Empty:
|
||||
break
|
||||
self._write_cached_data()
|
||||
self._wfp.close()
|
||||
|
||||
def _write_cached_data(self):
|
||||
if self._cache:
|
||||
data = b"".join(self._cache)
|
||||
self._wfp.writeframes(data)
|
||||
self._cache = []
|
||||
self._total_cached = 0
|
||||
|
||||
def open(self):
|
||||
self._reader.open()
|
||||
|
||||
def close(self):
|
||||
self._reader.close()
|
||||
self.stop()
|
||||
|
||||
def rewind(self):
|
||||
# ensure compatibility with AudioDataSource with record=True
|
||||
pass
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
with wave.open(self._tmp_output_filename, "rb") as wfp:
|
||||
return wfp.readframes(-1)
|
||||
|
||||
def save_stream(self):
|
||||
if self._exported:
|
||||
return self._output_filename
|
||||
|
||||
if self._export_format in ("raw", "wav"):
|
||||
if self._export_format == "raw":
|
||||
self._export_raw()
|
||||
self._exported = True
|
||||
return self._output_filename
|
||||
try:
|
||||
self._export_with_ffmpeg_or_avconv()
|
||||
except AudioEncodingError:
|
||||
try:
|
||||
self._export_with_sox()
|
||||
except AudioEncodingError:
|
||||
warn_msg = "Couldn't save audio data in the desired format "
|
||||
warn_msg += "'{}'. Either none of 'ffmpeg', 'avconv' or 'sox' "
|
||||
warn_msg += "is installed or this format is not recognized.\n"
|
||||
warn_msg += "Audio file was saved as '{}'"
|
||||
raise AudioEncodingWarning(
|
||||
warn_msg.format(
|
||||
self._export_format, self._tmp_output_filename
|
||||
)
|
||||
)
|
||||
finally:
|
||||
self._exported = True
|
||||
return self._output_filename
|
||||
|
||||
def _export_raw(self):
|
||||
with open(self._output_filename, "wb") as wfp:
|
||||
wfp.write(self.data)
|
||||
|
||||
def _export_with_ffmpeg_or_avconv(self):
|
||||
command = [
|
||||
"-y",
|
||||
"-f",
|
||||
"wav",
|
||||
"-i",
|
||||
self._tmp_output_filename,
|
||||
"-f",
|
||||
self._export_format,
|
||||
self._output_filename,
|
||||
]
|
||||
returncode, stdout, stderr = _run_subprocess(["ffmpeg"] + command)
|
||||
if returncode != 0:
|
||||
returncode, stdout, stderr = _run_subprocess(["avconv"] + command)
|
||||
if returncode != 0:
|
||||
raise AudioEncodingError(stderr)
|
||||
return stdout, stderr
|
||||
|
||||
def _export_with_sox(self):
|
||||
command = [
|
||||
"sox",
|
||||
"-t",
|
||||
"wav",
|
||||
self._tmp_output_filename,
|
||||
self._output_filename,
|
||||
]
|
||||
returncode, stdout, stderr = _run_subprocess(command)
|
||||
if returncode != 0:
|
||||
raise AudioEncodingError(stderr)
|
||||
return stdout, stderr
|
||||
|
||||
def close_output(self):
|
||||
self._wfp.close()
|
||||
|
||||
def read(self):
|
||||
data = self._reader.read()
|
||||
if data is not None:
|
||||
self.send(data)
|
||||
else:
|
||||
self.send(_STOP_PROCESSING)
|
||||
return data
|
||||
|
||||
def __getattr__(self, name):
|
||||
if name == "data":
|
||||
return self.data
|
||||
return getattr(self._reader, name)
|
||||
|
||||
|
||||
class PlayerWorker(Worker):
|
||||
def __init__(self, player, progress_bar=False, timeout=0.2, logger=None):
|
||||
self._player = player
|
||||
self._progress_bar = progress_bar
|
||||
self._log_format = "[PLAY]: Detection {id} played"
|
||||
Worker.__init__(self, timeout=timeout, logger=logger)
|
||||
|
||||
def _process_message(self, message):
|
||||
_id, audio_region = message
|
||||
if self._logger is not None:
|
||||
message = self._log_format.format(id=_id)
|
||||
self._log(message)
|
||||
audio_region.play(
|
||||
player=self._player, progress_bar=self._progress_bar, leave=False
|
||||
)
|
||||
|
||||
|
||||
class RegionSaverWorker(Worker):
|
||||
def __init__(
|
||||
self,
|
||||
filename_format,
|
||||
audio_format=None,
|
||||
timeout=0.2,
|
||||
logger=None,
|
||||
**audio_parameters
|
||||
):
|
||||
self._filename_format = filename_format
|
||||
self._audio_format = audio_format
|
||||
self._audio_parameters = audio_parameters
|
||||
self._debug_format = "[SAVE]: Detection {id} saved as '{filename}'"
|
||||
Worker.__init__(self, timeout=timeout, logger=logger)
|
||||
|
||||
def _process_message(self, message):
|
||||
_id, audio_region = message
|
||||
filename = self._filename_format.format(
|
||||
id=_id,
|
||||
start=audio_region.meta.start,
|
||||
end=audio_region.meta.end,
|
||||
duration=audio_region.duration,
|
||||
)
|
||||
filename = audio_region.save(
|
||||
filename, self._audio_format, **self._audio_parameters
|
||||
)
|
||||
if self._logger:
|
||||
message = self._debug_format.format(id=_id, filename=filename)
|
||||
self._log(message)
|
||||
|
||||
|
||||
class CommandLineWorker(Worker):
|
||||
def __init__(self, command, timeout=0.2, logger=None):
|
||||
self._command = command
|
||||
Worker.__init__(self, timeout=timeout, logger=logger)
|
||||
self._debug_format = "[COMMAND]: Detection {id} command: '{command}'"
|
||||
|
||||
def _process_message(self, message):
|
||||
_id, audio_region = message
|
||||
with NamedTemporaryFile(delete=False) as file:
|
||||
filename = audio_region.save(file.name, audio_format="wav")
|
||||
command = self._command.format(file=filename)
|
||||
os.system(command)
|
||||
if self._logger is not None:
|
||||
message = self._debug_format.format(id=_id, command=command)
|
||||
self._log(message)
|
||||
|
||||
|
||||
class PrintWorker(Worker):
|
||||
def __init__(
|
||||
self,
|
||||
print_format="{start} {end}",
|
||||
time_format="%S",
|
||||
timestamp_format="%Y/%m/%d %H:%M:%S.%f",
|
||||
timeout=0.2,
|
||||
):
|
||||
|
||||
self._print_format = print_format
|
||||
self._format_time = make_duration_formatter(time_format)
|
||||
self._timestamp_format = timestamp_format
|
||||
self.detections = []
|
||||
Worker.__init__(self, timeout=timeout)
|
||||
|
||||
def _process_message(self, message):
|
||||
_id, audio_region = message
|
||||
timestamp = audio_region.meta.timestamp
|
||||
timestamp = timestamp.strftime(self._timestamp_format)
|
||||
text = self._print_format.format(
|
||||
id=_id,
|
||||
start=self._format_time(audio_region.meta.start),
|
||||
end=self._format_time(audio_region.meta.end),
|
||||
duration=self._format_time(audio_region.duration),
|
||||
timestamp=timestamp,
|
||||
)
|
||||
print(text)
|
|
@ -34,7 +34,7 @@ urllib3=1.23
|
|||
Waitress=1.4.3
|
||||
|
||||
## indirect dependencies
|
||||
auditok=0.2.0 # Required-by: ffsubsync
|
||||
auditok=0.1.5 # Required-by: ffsubsync
|
||||
rich=10.1.0 # Required-by: ffsubsync
|
||||
srt=3.4.1 # Required-by: ffsubsync
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue