parent
63f1e63fa8
commit
c23ce4a4d1
File diff suppressed because it is too large
Load Diff
@ -1,126 +0,0 @@
|
||||
import sys
|
||||
import logging
|
||||
from collections import namedtuple
|
||||
from . import workers
|
||||
from .util import AudioDataSource
|
||||
from .io import player_for
|
||||
|
||||
_AUDITOK_LOGGER = "AUDITOK_LOGGER"
|
||||
KeywordArguments = namedtuple(
|
||||
"KeywordArguments", ["io", "split", "miscellaneous"]
|
||||
)
|
||||
|
||||
|
||||
def make_kwargs(args_ns):
|
||||
if args_ns.save_stream is None:
|
||||
record = args_ns.plot or (args_ns.save_image is not None)
|
||||
else:
|
||||
record = False
|
||||
try:
|
||||
use_channel = int(args_ns.use_channel)
|
||||
except (ValueError, TypeError):
|
||||
use_channel = args_ns.use_channel
|
||||
|
||||
io_kwargs = {
|
||||
"input": args_ns.input,
|
||||
"audio_format": args_ns.input_format,
|
||||
"max_read": args_ns.max_read,
|
||||
"block_dur": args_ns.analysis_window,
|
||||
"sampling_rate": args_ns.sampling_rate,
|
||||
"sample_width": args_ns.sample_width,
|
||||
"channels": args_ns.channels,
|
||||
"use_channel": use_channel,
|
||||
"save_stream": args_ns.save_stream,
|
||||
"save_detections_as": args_ns.save_detections_as,
|
||||
"export_format": args_ns.output_format,
|
||||
"large_file": args_ns.large_file,
|
||||
"frames_per_buffer": args_ns.frame_per_buffer,
|
||||
"input_device_index": args_ns.input_device_index,
|
||||
"record": record,
|
||||
}
|
||||
|
||||
split_kwargs = {
|
||||
"min_dur": args_ns.min_duration,
|
||||
"max_dur": args_ns.max_duration,
|
||||
"max_silence": args_ns.max_silence,
|
||||
"drop_trailing_silence": args_ns.drop_trailing_silence,
|
||||
"strict_min_dur": args_ns.strict_min_duration,
|
||||
"energy_threshold": args_ns.energy_threshold,
|
||||
}
|
||||
|
||||
miscellaneous = {
|
||||
"echo": args_ns.echo,
|
||||
"progress_bar": args_ns.progress_bar,
|
||||
"command": args_ns.command,
|
||||
"quiet": args_ns.quiet,
|
||||
"printf": args_ns.printf,
|
||||
"time_format": args_ns.time_format,
|
||||
"timestamp_format": args_ns.timestamp_format,
|
||||
}
|
||||
return KeywordArguments(io_kwargs, split_kwargs, miscellaneous)
|
||||
|
||||
|
||||
def make_logger(stderr=False, file=None, name=_AUDITOK_LOGGER):
|
||||
if not stderr and file is None:
|
||||
return None
|
||||
logger = logging.getLogger(name)
|
||||
logger.setLevel(logging.INFO)
|
||||
if stderr:
|
||||
handler = logging.StreamHandler(sys.stderr)
|
||||
handler.setLevel(logging.INFO)
|
||||
logger.addHandler(handler)
|
||||
|
||||
if file is not None:
|
||||
handler = logging.FileHandler(file, "w")
|
||||
fmt = logging.Formatter("[%(asctime)s] | %(message)s")
|
||||
handler.setFormatter(fmt)
|
||||
handler.setLevel(logging.INFO)
|
||||
logger.addHandler(handler)
|
||||
return logger
|
||||
|
||||
|
||||
def initialize_workers(logger=None, **kwargs):
|
||||
observers = []
|
||||
reader = AudioDataSource(source=kwargs["input"], **kwargs)
|
||||
if kwargs["save_stream"] is not None:
|
||||
reader = workers.StreamSaverWorker(
|
||||
reader,
|
||||
filename=kwargs["save_stream"],
|
||||
export_format=kwargs["export_format"],
|
||||
)
|
||||
reader.start()
|
||||
|
||||
if kwargs["save_detections_as"] is not None:
|
||||
worker = workers.RegionSaverWorker(
|
||||
kwargs["save_detections_as"],
|
||||
kwargs["export_format"],
|
||||
logger=logger,
|
||||
)
|
||||
observers.append(worker)
|
||||
|
||||
if kwargs["echo"]:
|
||||
player = player_for(reader)
|
||||
worker = workers.PlayerWorker(
|
||||
player, progress_bar=kwargs["progress_bar"], logger=logger
|
||||
)
|
||||
observers.append(worker)
|
||||
|
||||
if kwargs["command"] is not None:
|
||||
worker = workers.CommandLineWorker(
|
||||
command=kwargs["command"], logger=logger
|
||||
)
|
||||
observers.append(worker)
|
||||
|
||||
if not kwargs["quiet"]:
|
||||
print_format = (
|
||||
kwargs["printf"]
|
||||
.replace("\\n", "\n")
|
||||
.replace("\\t", "\t")
|
||||
.replace("\\r", "\r")
|
||||
)
|
||||
worker = workers.PrintWorker(
|
||||
print_format, kwargs["time_format"], kwargs["timestamp_format"]
|
||||
)
|
||||
observers.append(worker)
|
||||
|
||||
return reader, observers
|
File diff suppressed because it is too large
Load Diff
@ -1,31 +1,19 @@
|
||||
"""
|
||||
This module contains links to audio files that can be used for test purposes.
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
one_to_six_arabic_16000_mono_bc_noise
|
||||
was_der_mensch_saet_mono_44100_lead_trail_silence
|
||||
This module contains links to audio files you can use for test purposes.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
__all__ = [
|
||||
"one_to_six_arabic_16000_mono_bc_noise",
|
||||
"was_der_mensch_saet_mono_44100_lead_trail_silence",
|
||||
]
|
||||
__all__ = ["one_to_six_arabic_16000_mono_bc_noise", "was_der_mensch_saet_mono_44100_lead_trail_silence"]
|
||||
|
||||
_current_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
one_to_six_arabic_16000_mono_bc_noise = "{cd}{sep}data{sep}1to6arabic_\
|
||||
16000_mono_bc_noise.wav".format(
|
||||
cd=_current_dir, sep=os.path.sep
|
||||
)
|
||||
16000_mono_bc_noise.wav".format(cd=_current_dir, sep=os.path.sep)
|
||||
"""A wave file that contains a pronunciation of Arabic numbers from 1 to 6"""
|
||||
|
||||
|
||||
was_der_mensch_saet_mono_44100_lead_trail_silence = "{cd}{sep}data{sep}was_\
|
||||
der_mensch_saet_das_wird_er_vielfach_ernten_44100Hz_mono_lead_trail_\
|
||||
silence.wav".format(
|
||||
cd=_current_dir, sep=os.path.sep
|
||||
)
|
||||
"""A wave file that contains a sentence with a long leading and trailing silence"""
|
||||
silence.wav".format(cd=_current_dir, sep=os.path.sep)
|
||||
""" A wave file that contains a sentence between long leading and trailing periods of silence"""
|
File diff suppressed because it is too large
Load Diff
@ -1,150 +0,0 @@
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
AUDITOK_PLOT_THEME = {
|
||||
"figure": {"facecolor": "#482a36", "alpha": 0.2},
|
||||
"plot": {"facecolor": "#282a36"},
|
||||
"energy_threshold": {
|
||||
"color": "#e31f8f",
|
||||
"linestyle": "--",
|
||||
"linewidth": 1,
|
||||
},
|
||||
"signal": {"color": "#40d970", "linestyle": "-", "linewidth": 1},
|
||||
"detections": {
|
||||
"facecolor": "#777777",
|
||||
"edgecolor": "#ff8c1a",
|
||||
"linewidth": 1,
|
||||
"alpha": 0.75,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _make_time_axis(nb_samples, sampling_rate):
|
||||
sample_duration = 1 / sampling_rate
|
||||
x = np.linspace(0, sample_duration * (nb_samples - 1), nb_samples)
|
||||
return x
|
||||
|
||||
|
||||
def _plot_line(x, y, theme, xlabel=None, ylabel=None, **kwargs):
|
||||
color = theme.get("color", theme.get("c"))
|
||||
ls = theme.get("linestyle", theme.get("ls"))
|
||||
lw = theme.get("linewidth", theme.get("lw"))
|
||||
plt.plot(x, y, c=color, ls=ls, lw=lw, **kwargs)
|
||||
plt.xlabel(xlabel, fontsize=8)
|
||||
plt.ylabel(ylabel, fontsize=8)
|
||||
|
||||
|
||||
def _plot_detections(subplot, detections, theme):
|
||||
fc = theme.get("facecolor", theme.get("fc"))
|
||||
ec = theme.get("edgecolor", theme.get("ec"))
|
||||
ls = theme.get("linestyle", theme.get("ls"))
|
||||
lw = theme.get("linewidth", theme.get("lw"))
|
||||
alpha = theme.get("alpha")
|
||||
for (start, end) in detections:
|
||||
subplot.axvspan(start, end, fc=fc, ec=ec, ls=ls, lw=lw, alpha=alpha)
|
||||
|
||||
|
||||
def plot(
|
||||
audio_region,
|
||||
scale_signal=True,
|
||||
detections=None,
|
||||
energy_threshold=None,
|
||||
show=True,
|
||||
figsize=None,
|
||||
save_as=None,
|
||||
dpi=120,
|
||||
theme="auditok",
|
||||
):
|
||||
y = np.asarray(audio_region)
|
||||
if len(y.shape) == 1:
|
||||
y = y.reshape(1, -1)
|
||||
nb_subplots, nb_samples = y.shape
|
||||
sampling_rate = audio_region.sampling_rate
|
||||
time_axis = _make_time_axis(nb_samples, sampling_rate)
|
||||
if energy_threshold is not None:
|
||||
eth_log10 = energy_threshold * np.log(10) / 10
|
||||
amplitude_threshold = np.sqrt(np.exp(eth_log10))
|
||||
else:
|
||||
amplitude_threshold = None
|
||||
if detections is None:
|
||||
detections = []
|
||||
else:
|
||||
# End of detection corresponds to the end of the last sample but
|
||||
# to stay compatible with the time axis of signal plotting we want end
|
||||
# of detection to correspond to the *start* of the that last sample.
|
||||
detections = [
|
||||
(start, end - (1 / sampling_rate)) for (start, end) in detections
|
||||
]
|
||||
if theme == "auditok":
|
||||
theme = AUDITOK_PLOT_THEME
|
||||
|
||||
fig = plt.figure(figsize=figsize, dpi=dpi)
|
||||
fig_theme = theme.get("figure", theme.get("fig", {}))
|
||||
fig_fc = fig_theme.get("facecolor", fig_theme.get("ffc"))
|
||||
fig_alpha = fig_theme.get("alpha", 1)
|
||||
fig.patch.set_facecolor(fig_fc)
|
||||
fig.patch.set_alpha(fig_alpha)
|
||||
|
||||
plot_theme = theme.get("plot", {})
|
||||
plot_fc = plot_theme.get("facecolor", plot_theme.get("pfc"))
|
||||
|
||||
if nb_subplots > 2 and nb_subplots % 2 == 0:
|
||||
nb_rows = nb_subplots // 2
|
||||
nb_columns = 2
|
||||
else:
|
||||
nb_rows = nb_subplots
|
||||
nb_columns = 1
|
||||
|
||||
for sid, samples in enumerate(y, 1):
|
||||
ax = fig.add_subplot(nb_rows, nb_columns, sid)
|
||||
ax.set_facecolor(plot_fc)
|
||||
if scale_signal:
|
||||
std = samples.std()
|
||||
if std > 0:
|
||||
mean = samples.mean()
|
||||
std = samples.std()
|
||||
samples = (samples - mean) / std
|
||||
max_ = samples.max()
|
||||
plt.ylim(-1.5 * max_, 1.5 * max_)
|
||||
if amplitude_threshold is not None:
|
||||
if scale_signal and std > 0:
|
||||
amp_th = (amplitude_threshold - mean) / std
|
||||
else:
|
||||
amp_th = amplitude_threshold
|
||||
eth_theme = theme.get("energy_threshold", theme.get("eth", {}))
|
||||
_plot_line(
|
||||
[time_axis[0], time_axis[-1]],
|
||||
[amp_th] * 2,
|
||||
eth_theme,
|
||||
label="Detection threshold",
|
||||
)
|
||||
if sid == 1:
|
||||
legend = plt.legend(
|
||||
["Detection threshold"],
|
||||
facecolor=fig_fc,
|
||||
framealpha=0.1,
|
||||
bbox_to_anchor=(0.0, 1.15, 1.0, 0.102),
|
||||
loc=2,
|
||||
)
|
||||
legend = plt.gca().add_artist(legend)
|
||||
|
||||
signal_theme = theme.get("signal", {})
|
||||
_plot_line(
|
||||
time_axis,
|
||||
samples,
|
||||
signal_theme,
|
||||
xlabel="Time (seconds)",
|
||||
ylabel="Signal{}".format(" (scaled)" if scale_signal else ""),
|
||||
)
|
||||
detections_theme = theme.get("detections", {})
|
||||
_plot_detections(ax, detections, detections_theme)
|
||||
plt.title("Channel {}".format(sid), fontsize=10)
|
||||
|
||||
plt.xticks(fontsize=8)
|
||||
plt.yticks(fontsize=8)
|
||||
plt.tight_layout()
|
||||
|
||||
if save_as is not None:
|
||||
plt.savefig(save_as, dpi=dpi)
|
||||
if show:
|
||||
plt.show()
|
@ -1,179 +0,0 @@
|
||||
"""
|
||||
Module for basic audio signal processing and array operations.
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
to_array
|
||||
extract_single_channel
|
||||
compute_average_channel
|
||||
compute_average_channel_stereo
|
||||
separate_channels
|
||||
calculate_energy_single_channel
|
||||
calculate_energy_multichannel
|
||||
"""
|
||||
from array import array as array_
|
||||
import audioop
|
||||
import math
|
||||
|
||||
FORMAT = {1: "b", 2: "h", 4: "i"}
|
||||
_EPSILON = 1e-10
|
||||
|
||||
|
||||
def to_array(data, sample_width, channels):
|
||||
"""Extract individual channels of audio data and return a list of arrays of
|
||||
numeric samples. This will always return a list of `array.array` objects
|
||||
(one per channel) even if audio data is mono.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : bytes
|
||||
raw audio data.
|
||||
sample_width : int
|
||||
size in bytes of one audio sample (one channel considered).
|
||||
|
||||
Returns
|
||||
-------
|
||||
samples_arrays : list
|
||||
list of arrays of audio samples.
|
||||
"""
|
||||
fmt = FORMAT[sample_width]
|
||||
if channels == 1:
|
||||
return [array_(fmt, data)]
|
||||
return separate_channels(data, fmt, channels)
|
||||
|
||||
|
||||
def extract_single_channel(data, fmt, channels, selected):
|
||||
samples = array_(fmt, data)
|
||||
return samples[selected::channels]
|
||||
|
||||
|
||||
def compute_average_channel(data, fmt, channels):
|
||||
"""
|
||||
Compute and return average channel of multi-channel audio data. If the
|
||||
number of channels is 2, use :func:`compute_average_channel_stereo` (much
|
||||
faster). This function uses satandard `array` module to convert `bytes` data
|
||||
into an array of numeric values.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : bytes
|
||||
multi-channel audio data to mix down.
|
||||
fmt : str
|
||||
format (single character) to pass to `array.array` to convert `data`
|
||||
into an array of samples. This should be "b" if audio data's sample width
|
||||
is 1, "h" if it's 2 and "i" if it's 4.
|
||||
channels : int
|
||||
number of channels of audio data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
mono_audio : bytes
|
||||
mixed down audio data.
|
||||
"""
|
||||
all_channels = array_(fmt, data)
|
||||
mono_channels = [
|
||||
array_(fmt, all_channels[ch::channels]) for ch in range(channels)
|
||||
]
|
||||
avg_arr = array_(
|
||||
fmt,
|
||||
(round(sum(samples) / channels) for samples in zip(*mono_channels)),
|
||||
)
|
||||
return avg_arr
|
||||
|
||||
|
||||
def compute_average_channel_stereo(data, sample_width):
|
||||
"""Compute and return average channel of stereo audio data. This function
|
||||
should be used when the number of channels is exactly 2 because in that
|
||||
case we can use standard `audioop` module which *much* faster then calling
|
||||
:func:`compute_average_channel`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : bytes
|
||||
2-channel audio data to mix down.
|
||||
sample_width : int
|
||||
size in bytes of one audio sample (one channel considered).
|
||||
|
||||
Returns
|
||||
-------
|
||||
mono_audio : bytes
|
||||
mixed down audio data.
|
||||
"""
|
||||
fmt = FORMAT[sample_width]
|
||||
arr = array_(fmt, audioop.tomono(data, sample_width, 0.5, 0.5))
|
||||
return arr
|
||||
|
||||
|
||||
def separate_channels(data, fmt, channels):
|
||||
"""Create a list of arrays of audio samples (`array.array` objects), one for
|
||||
each channel.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : bytes
|
||||
multi-channel audio data to mix down.
|
||||
fmt : str
|
||||
format (single character) to pass to `array.array` to convert `data`
|
||||
into an array of samples. This should be "b" if audio data's sample width
|
||||
is 1, "h" if it's 2 and "i" if it's 4.
|
||||
channels : int
|
||||
number of channels of audio data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
channels_arr : list
|
||||
list of audio channels, each as a standard `array.array`.
|
||||
"""
|
||||
all_channels = array_(fmt, data)
|
||||
mono_channels = [
|
||||
array_(fmt, all_channels[ch::channels]) for ch in range(channels)
|
||||
]
|
||||
return mono_channels
|
||||
|
||||
|
||||
def calculate_energy_single_channel(data, sample_width):
|
||||
"""Calculate the energy of mono audio data. Energy is computed as:
|
||||
|
||||
.. math:: energy = 20 \log(\sqrt({1}/{N}\sum_{i}^{N}{a_i}^2)) % # noqa: W605
|
||||
|
||||
where `a_i` is the i-th audio sample and `N` is the number of audio samples
|
||||
in data.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : bytes
|
||||
single-channel audio data.
|
||||
sample_width : int
|
||||
size in bytes of one audio sample.
|
||||
|
||||
Returns
|
||||
-------
|
||||
energy : float
|
||||
energy of audio signal.
|
||||
"""
|
||||
energy_sqrt = max(audioop.rms(data, sample_width), _EPSILON)
|
||||
return 20 * math.log10(energy_sqrt)
|
||||
|
||||
|
||||
def calculate_energy_multichannel(x, sample_width, aggregation_fn=max):
|
||||
"""Calculate the energy of multi-channel audio data. Energy is calculated
|
||||
channel-wise. An aggregation function is applied to the resulting energies
|
||||
(default: `max`). Also see :func:`calculate_energy_single_channel`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : bytes
|
||||
single-channel audio data.
|
||||
sample_width : int
|
||||
size in bytes of one audio sample (one channel considered).
|
||||
aggregation_fn : callable, default: max
|
||||
aggregation function to apply to the resulting per-channel energies.
|
||||
|
||||
Returns
|
||||
-------
|
||||
energy : float
|
||||
aggregated energy of multi-channel audio signal.
|
||||
"""
|
||||
energies = (calculate_energy_single_channel(xi, sample_width) for xi in x)
|
||||
return aggregation_fn(energies)
|
@ -1,30 +0,0 @@
|
||||
import numpy as np
|
||||
from .signal import (
|
||||
compute_average_channel_stereo,
|
||||
calculate_energy_single_channel,
|
||||
calculate_energy_multichannel,
|
||||
)
|
||||
|
||||
FORMAT = {1: np.int8, 2: np.int16, 4: np.int32}
|
||||
|
||||
|
||||
def to_array(data, sample_width, channels):
|
||||
fmt = FORMAT[sample_width]
|
||||
if channels == 1:
|
||||
return np.frombuffer(data, dtype=fmt).astype(np.float64)
|
||||
return separate_channels(data, fmt, channels).astype(np.float64)
|
||||
|
||||
|
||||
def extract_single_channel(data, fmt, channels, selected):
|
||||
samples = np.frombuffer(data, dtype=fmt)
|
||||
return np.asanyarray(samples[selected::channels], order="C")
|
||||
|
||||
|
||||
def compute_average_channel(data, fmt, channels):
|
||||
array = np.frombuffer(data, dtype=fmt).astype(np.float64)
|
||||
return array.reshape(-1, channels).mean(axis=1).round().astype(fmt)
|
||||
|
||||
|
||||
def separate_channels(data, fmt, channels):
|
||||
array = np.frombuffer(data, dtype=fmt)
|
||||
return np.asanyarray(array.reshape(-1, channels).T, order="C")
|
File diff suppressed because it is too large
Load Diff
@ -1,427 +0,0 @@
|
||||
import os
|
||||
import sys
|
||||
from tempfile import NamedTemporaryFile
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from threading import Thread
|
||||
from datetime import datetime, timedelta
|
||||
from collections import namedtuple
|
||||
import wave
|
||||
import subprocess
|
||||
from queue import Queue, Empty
|
||||
from .io import _guess_audio_format
|
||||
from .util import AudioDataSource, make_duration_formatter
|
||||
from .core import split
|
||||
from .exceptions import (
|
||||
EndOfProcessing,
|
||||
AudioEncodingError,
|
||||
AudioEncodingWarning,
|
||||
)
|
||||
|
||||
|
||||
_STOP_PROCESSING = "STOP_PROCESSING"
|
||||
_Detection = namedtuple("_Detection", "id start end duration")
|
||||
|
||||
|
||||
def _run_subprocess(command):
|
||||
try:
|
||||
with subprocess.Popen(
|
||||
command,
|
||||
stdin=open(os.devnull, "rb"),
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
) as proc:
|
||||
stdout, stderr = proc.communicate()
|
||||
return proc.returncode, stdout, stderr
|
||||
except Exception:
|
||||
err_msg = "Couldn't export audio using command: '{}'".format(command)
|
||||
raise AudioEncodingError(err_msg)
|
||||
|
||||
|
||||
class Worker(Thread, metaclass=ABCMeta):
|
||||
def __init__(self, timeout=0.5, logger=None):
|
||||
self._timeout = timeout
|
||||
self._logger = logger
|
||||
self._inbox = Queue()
|
||||
Thread.__init__(self)
|
||||
|
||||
def run(self):
|
||||
while True:
|
||||
message = self._get_message()
|
||||
if message == _STOP_PROCESSING:
|
||||
break
|
||||
if message is not None:
|
||||
self._process_message(message)
|
||||
self._post_process()
|
||||
|
||||
@abstractmethod
|
||||
def _process_message(self, message):
|
||||
"""Process incoming messages"""
|
||||
|
||||
def _post_process(self):
|
||||
pass
|
||||
|
||||
def _log(self, message):
|
||||
self._logger.info(message)
|
||||
|
||||
def _stop_requested(self):
|
||||
try:
|
||||
message = self._inbox.get_nowait()
|
||||
if message == _STOP_PROCESSING:
|
||||
return True
|
||||
except Empty:
|
||||
return False
|
||||
|
||||
def stop(self):
|
||||
self.send(_STOP_PROCESSING)
|
||||
self.join()
|
||||
|
||||
def send(self, message):
|
||||
self._inbox.put(message)
|
||||
|
||||
def _get_message(self):
|
||||
try:
|
||||
message = self._inbox.get(timeout=self._timeout)
|
||||
return message
|
||||
except Empty:
|
||||
return None
|
||||
|
||||
|
||||
class TokenizerWorker(Worker, AudioDataSource):
|
||||
def __init__(self, reader, observers=None, logger=None, **kwargs):
|
||||
self._observers = observers if observers is not None else []
|
||||
self._reader = reader
|
||||
self._audio_region_gen = split(self, **kwargs)
|
||||
self._detections = []
|
||||
self._log_format = "[DET]: Detection {0.id} (start: {0.start:.3f}, "
|
||||
self._log_format += "end: {0.end:.3f}, duration: {0.duration:.3f})"
|
||||
Worker.__init__(self, timeout=0.2, logger=logger)
|
||||
|
||||
def _process_message(self):
|
||||
pass
|
||||
|
||||
@property
|
||||
def detections(self):
|
||||
return self._detections
|
||||
|
||||
def _notify_observers(self, message):
|
||||
for observer in self._observers:
|
||||
observer.send(message)
|
||||
|
||||
def run(self):
|
||||
self._reader.open()
|
||||
start_processing_timestamp = datetime.now()
|
||||
for _id, audio_region in enumerate(self._audio_region_gen, start=1):
|
||||
timestamp = start_processing_timestamp + timedelta(
|
||||
seconds=audio_region.meta.start
|
||||
)
|
||||
audio_region.meta.timestamp = timestamp
|
||||
detection = _Detection(
|
||||
_id,
|
||||
audio_region.meta.start,
|
||||
audio_region.meta.end,
|
||||
audio_region.duration,
|
||||
)
|
||||
self._detections.append(detection)
|
||||
if self._logger is not None:
|
||||
message = self._log_format.format(detection)
|
||||
self._log(message)
|
||||
self._notify_observers((_id, audio_region))
|
||||
self._notify_observers(_STOP_PROCESSING)
|
||||
self._reader.close()
|
||||
|
||||
def start_all(self):
|
||||
for observer in self._observers:
|
||||
observer.start()
|
||||
self.start()
|
||||
|
||||
def stop_all(self):
|
||||
self.stop()
|
||||
for observer in self._observers:
|
||||
observer.stop()
|
||||
self._reader.close()
|
||||
|
||||
def read(self):
|
||||
if self._stop_requested():
|
||||
return None
|
||||
else:
|
||||
return self._reader.read()
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self._reader, name)
|
||||
|
||||
|
||||
class StreamSaverWorker(Worker):
|
||||
def __init__(
|
||||
self,
|
||||
audio_reader,
|
||||
filename,
|
||||
export_format=None,
|
||||
cache_size_sec=0.5,
|
||||
timeout=0.2,
|
||||
):
|
||||
self._reader = audio_reader
|
||||
sample_size_bytes = self._reader.sw * self._reader.ch
|
||||
self._cache_size = cache_size_sec * self._reader.sr * sample_size_bytes
|
||||
self._output_filename = filename
|
||||
self._export_format = _guess_audio_format(export_format, filename)
|
||||
if self._export_format is None:
|
||||
self._export_format = "wav"
|
||||
self._init_output_stream()
|
||||
self._exported = False
|
||||
self._cache = []
|
||||
self._total_cached = 0
|
||||
Worker.__init__(self, timeout=timeout)
|
||||
|
||||
def _get_non_existent_filename(self):
|
||||
filename = self._output_filename + ".wav"
|
||||
i = 0
|
||||
while os.path.exists(filename):
|
||||
i += 1
|
||||
filename = self._output_filename + "({}).wav".format(i)
|
||||
return filename
|
||||
|
||||
def _init_output_stream(self):
|
||||
if self._export_format != "wav":
|
||||
self._tmp_output_filename = self._get_non_existent_filename()
|
||||
else:
|
||||
self._tmp_output_filename = self._output_filename
|
||||
self._wfp = wave.open(self._tmp_output_filename, "wb")
|
||||
self._wfp.setframerate(self._reader.sr)
|
||||
self._wfp.setsampwidth(self._reader.sw)
|
||||
self._wfp.setnchannels(self._reader.ch)
|
||||
|
||||
@property
|
||||
def sr(self):
|
||||
return self._reader.sampling_rate
|
||||
|
||||
@property
|
||||
def sw(self):
|
||||
return self._reader.sample_width
|
||||
|
||||
@property
|
||||
def ch(self):
|
||||
return self._reader.channels
|
||||
|
||||
def __del__(self):
|
||||
self._post_process()
|
||||
|
||||
if (
|
||||
(self._tmp_output_filename != self._output_filename)
|
||||
and self._exported
|
||||
and os.path.exists(self._tmp_output_filename)
|
||||
):
|
||||
os.remove(self._tmp_output_filename)
|
||||
|
||||
def _process_message(self, data):
|
||||
self._cache.append(data)
|
||||
self._total_cached += len(data)
|
||||
if self._total_cached >= self._cache_size:
|
||||
self._write_cached_data()
|
||||
|
||||
def _post_process(self):
|
||||
while True:
|
||||
try:
|
||||
data = self._inbox.get_nowait()
|
||||
if data != _STOP_PROCESSING:
|
||||
self._cache.append(data)
|
||||
self._total_cached += len(data)
|
||||
except Empty:
|
||||
break
|
||||
self._write_cached_data()
|
||||
self._wfp.close()
|
||||
|
||||
def _write_cached_data(self):
|
||||
if self._cache:
|
||||
data = b"".join(self._cache)
|
||||
self._wfp.writeframes(data)
|
||||
self._cache = []
|
||||
self._total_cached = 0
|
||||
|
||||
def open(self):
|
||||
self._reader.open()
|
||||
|
||||
def close(self):
|
||||
self._reader.close()
|
||||
self.stop()
|
||||
|
||||
def rewind(self):
|
||||
# ensure compatibility with AudioDataSource with record=True
|
||||
pass
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
with wave.open(self._tmp_output_filename, "rb") as wfp:
|
||||
return wfp.readframes(-1)
|
||||
|
||||
def save_stream(self):
|
||||
if self._exported:
|
||||
return self._output_filename
|
||||
|
||||
if self._export_format in ("raw", "wav"):
|
||||
if self._export_format == "raw":
|
||||
self._export_raw()
|
||||
self._exported = True
|
||||
return self._output_filename
|
||||
try:
|
||||
self._export_with_ffmpeg_or_avconv()
|
||||
except AudioEncodingError:
|
||||
try:
|
||||
self._export_with_sox()
|
||||
except AudioEncodingError:
|
||||
warn_msg = "Couldn't save audio data in the desired format "
|
||||
warn_msg += "'{}'. Either none of 'ffmpeg', 'avconv' or 'sox' "
|
||||
warn_msg += "is installed or this format is not recognized.\n"
|
||||
warn_msg += "Audio file was saved as '{}'"
|
||||
raise AudioEncodingWarning(
|
||||
warn_msg.format(
|
||||
self._export_format, self._tmp_output_filename
|
||||
)
|
||||
)
|
||||
finally:
|
||||
self._exported = True
|
||||
return self._output_filename
|
||||
|
||||
def _export_raw(self):
|
||||
with open(self._output_filename, "wb") as wfp:
|
||||
wfp.write(self.data)
|
||||
|
||||
def _export_with_ffmpeg_or_avconv(self):
|
||||
command = [
|
||||
"-y",
|
||||
"-f",
|
||||
"wav",
|
||||
"-i",
|
||||
self._tmp_output_filename,
|
||||
"-f",
|
||||
self._export_format,
|
||||
self._output_filename,
|
||||
]
|
||||
returncode, stdout, stderr = _run_subprocess(["ffmpeg"] + command)
|
||||
if returncode != 0:
|
||||
returncode, stdout, stderr = _run_subprocess(["avconv"] + command)
|
||||
if returncode != 0:
|
||||
raise AudioEncodingError(stderr)
|
||||
return stdout, stderr
|
||||
|
||||
def _export_with_sox(self):
|
||||
command = [
|
||||
"sox",
|
||||
"-t",
|
||||
"wav",
|
||||
self._tmp_output_filename,
|
||||
self._output_filename,
|
||||
]
|
||||
returncode, stdout, stderr = _run_subprocess(command)
|
||||
if returncode != 0:
|
||||
raise AudioEncodingError(stderr)
|
||||
return stdout, stderr
|
||||
|
||||
def close_output(self):
|
||||
self._wfp.close()
|
||||
|
||||
def read(self):
|
||||
data = self._reader.read()
|
||||
if data is not None:
|
||||
self.send(data)
|
||||
else:
|
||||
self.send(_STOP_PROCESSING)
|
||||
return data
|
||||
|
||||
def __getattr__(self, name):
|
||||
if name == "data":
|
||||
return self.data
|
||||
return getattr(self._reader, name)
|
||||
|
||||
|
||||
class PlayerWorker(Worker):
|
||||
def __init__(self, player, progress_bar=False, timeout=0.2, logger=None):
|
||||
self._player = player
|
||||
self._progress_bar = progress_bar
|
||||
self._log_format = "[PLAY]: Detection {id} played"
|
||||
Worker.__init__(self, timeout=timeout, logger=logger)
|
||||
|
||||
def _process_message(self, message):
|
||||
_id, audio_region = message
|
||||
if self._logger is not None:
|
||||
message = self._log_format.format(id=_id)
|
||||
self._log(message)
|
||||
audio_region.play(
|
||||
player=self._player, progress_bar=self._progress_bar, leave=False
|
||||
)
|
||||
|
||||
|
||||
class RegionSaverWorker(Worker):
|
||||
def __init__(
|
||||
self,
|
||||
filename_format,
|
||||
audio_format=None,
|
||||
timeout=0.2,
|
||||
logger=None,
|
||||
**audio_parameters
|
||||
):
|
||||
self._filename_format = filename_format
|
||||
self._audio_format = audio_format
|
||||
self._audio_parameters = audio_parameters
|
||||
self._debug_format = "[SAVE]: Detection {id} saved as '{filename}'"
|
||||
Worker.__init__(self, timeout=timeout, logger=logger)
|
||||
|
||||
def _process_message(self, message):
|
||||
_id, audio_region = message
|
||||
filename = self._filename_format.format(
|
||||
id=_id,
|
||||
start=audio_region.meta.start,
|
||||
end=audio_region.meta.end,
|
||||
duration=audio_region.duration,
|
||||
)
|
||||
filename = audio_region.save(
|
||||
filename, self._audio_format, **self._audio_parameters
|
||||
)
|
||||
if self._logger:
|
||||
message = self._debug_format.format(id=_id, filename=filename)
|
||||
self._log(message)
|
||||
|
||||
|
||||
class CommandLineWorker(Worker):
|
||||
def __init__(self, command, timeout=0.2, logger=None):
|
||||
self._command = command
|
||||
Worker.__init__(self, timeout=timeout, logger=logger)
|
||||
self._debug_format = "[COMMAND]: Detection {id} command: '{command}'"
|
||||
|
||||
def _process_message(self, message):
|
||||
_id, audio_region = message
|
||||
with NamedTemporaryFile(delete=False) as file:
|
||||
filename = audio_region.save(file.name, audio_format="wav")
|
||||
command = self._command.format(file=filename)
|
||||
os.system(command)
|
||||
if self._logger is not None:
|
||||
message = self._debug_format.format(id=_id, command=command)
|
||||
self._log(message)
|
||||
|
||||
|
||||
class PrintWorker(Worker):
|
||||
def __init__(
|
||||
self,
|
||||
print_format="{start} {end}",
|
||||
time_format="%S",
|
||||
timestamp_format="%Y/%m/%d %H:%M:%S.%f",
|
||||
timeout=0.2,
|
||||
):
|
||||
|
||||
self._print_format = print_format
|
||||
self._format_time = make_duration_formatter(time_format)
|
||||
self._timestamp_format = timestamp_format
|
||||
self.detections = []
|
||||
Worker.__init__(self, timeout=timeout)
|
||||
|
||||
def _process_message(self, message):
|
||||
_id, audio_region = message
|
||||
timestamp = audio_region.meta.timestamp
|
||||
timestamp = timestamp.strftime(self._timestamp_format)
|
||||
text = self._print_format.format(
|
||||
id=_id,
|
||||
start=self._format_time(audio_region.meta.start),
|
||||
end=self._format_time(audio_region.meta.end),
|
||||
duration=self._format_time(audio_region.duration),
|
||||
timestamp=timestamp,
|
||||
)
|
||||
print(text)
|
Loading…
Reference in new issue