parent
63f1e63fa8
commit
c23ce4a4d1
File diff suppressed because it is too large
Load Diff
@ -1,126 +0,0 @@
|
|||||||
import sys
|
|
||||||
import logging
|
|
||||||
from collections import namedtuple
|
|
||||||
from . import workers
|
|
||||||
from .util import AudioDataSource
|
|
||||||
from .io import player_for
|
|
||||||
|
|
||||||
_AUDITOK_LOGGER = "AUDITOK_LOGGER"
|
|
||||||
KeywordArguments = namedtuple(
|
|
||||||
"KeywordArguments", ["io", "split", "miscellaneous"]
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def make_kwargs(args_ns):
|
|
||||||
if args_ns.save_stream is None:
|
|
||||||
record = args_ns.plot or (args_ns.save_image is not None)
|
|
||||||
else:
|
|
||||||
record = False
|
|
||||||
try:
|
|
||||||
use_channel = int(args_ns.use_channel)
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
use_channel = args_ns.use_channel
|
|
||||||
|
|
||||||
io_kwargs = {
|
|
||||||
"input": args_ns.input,
|
|
||||||
"audio_format": args_ns.input_format,
|
|
||||||
"max_read": args_ns.max_read,
|
|
||||||
"block_dur": args_ns.analysis_window,
|
|
||||||
"sampling_rate": args_ns.sampling_rate,
|
|
||||||
"sample_width": args_ns.sample_width,
|
|
||||||
"channels": args_ns.channels,
|
|
||||||
"use_channel": use_channel,
|
|
||||||
"save_stream": args_ns.save_stream,
|
|
||||||
"save_detections_as": args_ns.save_detections_as,
|
|
||||||
"export_format": args_ns.output_format,
|
|
||||||
"large_file": args_ns.large_file,
|
|
||||||
"frames_per_buffer": args_ns.frame_per_buffer,
|
|
||||||
"input_device_index": args_ns.input_device_index,
|
|
||||||
"record": record,
|
|
||||||
}
|
|
||||||
|
|
||||||
split_kwargs = {
|
|
||||||
"min_dur": args_ns.min_duration,
|
|
||||||
"max_dur": args_ns.max_duration,
|
|
||||||
"max_silence": args_ns.max_silence,
|
|
||||||
"drop_trailing_silence": args_ns.drop_trailing_silence,
|
|
||||||
"strict_min_dur": args_ns.strict_min_duration,
|
|
||||||
"energy_threshold": args_ns.energy_threshold,
|
|
||||||
}
|
|
||||||
|
|
||||||
miscellaneous = {
|
|
||||||
"echo": args_ns.echo,
|
|
||||||
"progress_bar": args_ns.progress_bar,
|
|
||||||
"command": args_ns.command,
|
|
||||||
"quiet": args_ns.quiet,
|
|
||||||
"printf": args_ns.printf,
|
|
||||||
"time_format": args_ns.time_format,
|
|
||||||
"timestamp_format": args_ns.timestamp_format,
|
|
||||||
}
|
|
||||||
return KeywordArguments(io_kwargs, split_kwargs, miscellaneous)
|
|
||||||
|
|
||||||
|
|
||||||
def make_logger(stderr=False, file=None, name=_AUDITOK_LOGGER):
|
|
||||||
if not stderr and file is None:
|
|
||||||
return None
|
|
||||||
logger = logging.getLogger(name)
|
|
||||||
logger.setLevel(logging.INFO)
|
|
||||||
if stderr:
|
|
||||||
handler = logging.StreamHandler(sys.stderr)
|
|
||||||
handler.setLevel(logging.INFO)
|
|
||||||
logger.addHandler(handler)
|
|
||||||
|
|
||||||
if file is not None:
|
|
||||||
handler = logging.FileHandler(file, "w")
|
|
||||||
fmt = logging.Formatter("[%(asctime)s] | %(message)s")
|
|
||||||
handler.setFormatter(fmt)
|
|
||||||
handler.setLevel(logging.INFO)
|
|
||||||
logger.addHandler(handler)
|
|
||||||
return logger
|
|
||||||
|
|
||||||
|
|
||||||
def initialize_workers(logger=None, **kwargs):
|
|
||||||
observers = []
|
|
||||||
reader = AudioDataSource(source=kwargs["input"], **kwargs)
|
|
||||||
if kwargs["save_stream"] is not None:
|
|
||||||
reader = workers.StreamSaverWorker(
|
|
||||||
reader,
|
|
||||||
filename=kwargs["save_stream"],
|
|
||||||
export_format=kwargs["export_format"],
|
|
||||||
)
|
|
||||||
reader.start()
|
|
||||||
|
|
||||||
if kwargs["save_detections_as"] is not None:
|
|
||||||
worker = workers.RegionSaverWorker(
|
|
||||||
kwargs["save_detections_as"],
|
|
||||||
kwargs["export_format"],
|
|
||||||
logger=logger,
|
|
||||||
)
|
|
||||||
observers.append(worker)
|
|
||||||
|
|
||||||
if kwargs["echo"]:
|
|
||||||
player = player_for(reader)
|
|
||||||
worker = workers.PlayerWorker(
|
|
||||||
player, progress_bar=kwargs["progress_bar"], logger=logger
|
|
||||||
)
|
|
||||||
observers.append(worker)
|
|
||||||
|
|
||||||
if kwargs["command"] is not None:
|
|
||||||
worker = workers.CommandLineWorker(
|
|
||||||
command=kwargs["command"], logger=logger
|
|
||||||
)
|
|
||||||
observers.append(worker)
|
|
||||||
|
|
||||||
if not kwargs["quiet"]:
|
|
||||||
print_format = (
|
|
||||||
kwargs["printf"]
|
|
||||||
.replace("\\n", "\n")
|
|
||||||
.replace("\\t", "\t")
|
|
||||||
.replace("\\r", "\r")
|
|
||||||
)
|
|
||||||
worker = workers.PrintWorker(
|
|
||||||
print_format, kwargs["time_format"], kwargs["timestamp_format"]
|
|
||||||
)
|
|
||||||
observers.append(worker)
|
|
||||||
|
|
||||||
return reader, observers
|
|
File diff suppressed because it is too large
Load Diff
@ -1,31 +1,19 @@
|
|||||||
"""
|
"""
|
||||||
This module contains links to audio files that can be used for test purposes.
|
This module contains links to audio files you can use for test purposes.
|
||||||
|
|
||||||
.. autosummary::
|
|
||||||
:toctree: generated/
|
|
||||||
|
|
||||||
one_to_six_arabic_16000_mono_bc_noise
|
|
||||||
was_der_mensch_saet_mono_44100_lead_trail_silence
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
__all__ = [
|
__all__ = ["one_to_six_arabic_16000_mono_bc_noise", "was_der_mensch_saet_mono_44100_lead_trail_silence"]
|
||||||
"one_to_six_arabic_16000_mono_bc_noise",
|
|
||||||
"was_der_mensch_saet_mono_44100_lead_trail_silence",
|
|
||||||
]
|
|
||||||
|
|
||||||
_current_dir = os.path.dirname(os.path.realpath(__file__))
|
_current_dir = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
|
||||||
one_to_six_arabic_16000_mono_bc_noise = "{cd}{sep}data{sep}1to6arabic_\
|
one_to_six_arabic_16000_mono_bc_noise = "{cd}{sep}data{sep}1to6arabic_\
|
||||||
16000_mono_bc_noise.wav".format(
|
16000_mono_bc_noise.wav".format(cd=_current_dir, sep=os.path.sep)
|
||||||
cd=_current_dir, sep=os.path.sep
|
|
||||||
)
|
|
||||||
"""A wave file that contains a pronunciation of Arabic numbers from 1 to 6"""
|
"""A wave file that contains a pronunciation of Arabic numbers from 1 to 6"""
|
||||||
|
|
||||||
|
|
||||||
was_der_mensch_saet_mono_44100_lead_trail_silence = "{cd}{sep}data{sep}was_\
|
was_der_mensch_saet_mono_44100_lead_trail_silence = "{cd}{sep}data{sep}was_\
|
||||||
der_mensch_saet_das_wird_er_vielfach_ernten_44100Hz_mono_lead_trail_\
|
der_mensch_saet_das_wird_er_vielfach_ernten_44100Hz_mono_lead_trail_\
|
||||||
silence.wav".format(
|
silence.wav".format(cd=_current_dir, sep=os.path.sep)
|
||||||
cd=_current_dir, sep=os.path.sep
|
""" A wave file that contains a sentence between long leading and trailing periods of silence"""
|
||||||
)
|
|
||||||
"""A wave file that contains a sentence with a long leading and trailing silence"""
|
|
File diff suppressed because it is too large
Load Diff
@ -1,150 +0,0 @@
|
|||||||
import matplotlib.pyplot as plt
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
AUDITOK_PLOT_THEME = {
|
|
||||||
"figure": {"facecolor": "#482a36", "alpha": 0.2},
|
|
||||||
"plot": {"facecolor": "#282a36"},
|
|
||||||
"energy_threshold": {
|
|
||||||
"color": "#e31f8f",
|
|
||||||
"linestyle": "--",
|
|
||||||
"linewidth": 1,
|
|
||||||
},
|
|
||||||
"signal": {"color": "#40d970", "linestyle": "-", "linewidth": 1},
|
|
||||||
"detections": {
|
|
||||||
"facecolor": "#777777",
|
|
||||||
"edgecolor": "#ff8c1a",
|
|
||||||
"linewidth": 1,
|
|
||||||
"alpha": 0.75,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _make_time_axis(nb_samples, sampling_rate):
|
|
||||||
sample_duration = 1 / sampling_rate
|
|
||||||
x = np.linspace(0, sample_duration * (nb_samples - 1), nb_samples)
|
|
||||||
return x
|
|
||||||
|
|
||||||
|
|
||||||
def _plot_line(x, y, theme, xlabel=None, ylabel=None, **kwargs):
|
|
||||||
color = theme.get("color", theme.get("c"))
|
|
||||||
ls = theme.get("linestyle", theme.get("ls"))
|
|
||||||
lw = theme.get("linewidth", theme.get("lw"))
|
|
||||||
plt.plot(x, y, c=color, ls=ls, lw=lw, **kwargs)
|
|
||||||
plt.xlabel(xlabel, fontsize=8)
|
|
||||||
plt.ylabel(ylabel, fontsize=8)
|
|
||||||
|
|
||||||
|
|
||||||
def _plot_detections(subplot, detections, theme):
|
|
||||||
fc = theme.get("facecolor", theme.get("fc"))
|
|
||||||
ec = theme.get("edgecolor", theme.get("ec"))
|
|
||||||
ls = theme.get("linestyle", theme.get("ls"))
|
|
||||||
lw = theme.get("linewidth", theme.get("lw"))
|
|
||||||
alpha = theme.get("alpha")
|
|
||||||
for (start, end) in detections:
|
|
||||||
subplot.axvspan(start, end, fc=fc, ec=ec, ls=ls, lw=lw, alpha=alpha)
|
|
||||||
|
|
||||||
|
|
||||||
def plot(
|
|
||||||
audio_region,
|
|
||||||
scale_signal=True,
|
|
||||||
detections=None,
|
|
||||||
energy_threshold=None,
|
|
||||||
show=True,
|
|
||||||
figsize=None,
|
|
||||||
save_as=None,
|
|
||||||
dpi=120,
|
|
||||||
theme="auditok",
|
|
||||||
):
|
|
||||||
y = np.asarray(audio_region)
|
|
||||||
if len(y.shape) == 1:
|
|
||||||
y = y.reshape(1, -1)
|
|
||||||
nb_subplots, nb_samples = y.shape
|
|
||||||
sampling_rate = audio_region.sampling_rate
|
|
||||||
time_axis = _make_time_axis(nb_samples, sampling_rate)
|
|
||||||
if energy_threshold is not None:
|
|
||||||
eth_log10 = energy_threshold * np.log(10) / 10
|
|
||||||
amplitude_threshold = np.sqrt(np.exp(eth_log10))
|
|
||||||
else:
|
|
||||||
amplitude_threshold = None
|
|
||||||
if detections is None:
|
|
||||||
detections = []
|
|
||||||
else:
|
|
||||||
# End of detection corresponds to the end of the last sample but
|
|
||||||
# to stay compatible with the time axis of signal plotting we want end
|
|
||||||
# of detection to correspond to the *start* of the that last sample.
|
|
||||||
detections = [
|
|
||||||
(start, end - (1 / sampling_rate)) for (start, end) in detections
|
|
||||||
]
|
|
||||||
if theme == "auditok":
|
|
||||||
theme = AUDITOK_PLOT_THEME
|
|
||||||
|
|
||||||
fig = plt.figure(figsize=figsize, dpi=dpi)
|
|
||||||
fig_theme = theme.get("figure", theme.get("fig", {}))
|
|
||||||
fig_fc = fig_theme.get("facecolor", fig_theme.get("ffc"))
|
|
||||||
fig_alpha = fig_theme.get("alpha", 1)
|
|
||||||
fig.patch.set_facecolor(fig_fc)
|
|
||||||
fig.patch.set_alpha(fig_alpha)
|
|
||||||
|
|
||||||
plot_theme = theme.get("plot", {})
|
|
||||||
plot_fc = plot_theme.get("facecolor", plot_theme.get("pfc"))
|
|
||||||
|
|
||||||
if nb_subplots > 2 and nb_subplots % 2 == 0:
|
|
||||||
nb_rows = nb_subplots // 2
|
|
||||||
nb_columns = 2
|
|
||||||
else:
|
|
||||||
nb_rows = nb_subplots
|
|
||||||
nb_columns = 1
|
|
||||||
|
|
||||||
for sid, samples in enumerate(y, 1):
|
|
||||||
ax = fig.add_subplot(nb_rows, nb_columns, sid)
|
|
||||||
ax.set_facecolor(plot_fc)
|
|
||||||
if scale_signal:
|
|
||||||
std = samples.std()
|
|
||||||
if std > 0:
|
|
||||||
mean = samples.mean()
|
|
||||||
std = samples.std()
|
|
||||||
samples = (samples - mean) / std
|
|
||||||
max_ = samples.max()
|
|
||||||
plt.ylim(-1.5 * max_, 1.5 * max_)
|
|
||||||
if amplitude_threshold is not None:
|
|
||||||
if scale_signal and std > 0:
|
|
||||||
amp_th = (amplitude_threshold - mean) / std
|
|
||||||
else:
|
|
||||||
amp_th = amplitude_threshold
|
|
||||||
eth_theme = theme.get("energy_threshold", theme.get("eth", {}))
|
|
||||||
_plot_line(
|
|
||||||
[time_axis[0], time_axis[-1]],
|
|
||||||
[amp_th] * 2,
|
|
||||||
eth_theme,
|
|
||||||
label="Detection threshold",
|
|
||||||
)
|
|
||||||
if sid == 1:
|
|
||||||
legend = plt.legend(
|
|
||||||
["Detection threshold"],
|
|
||||||
facecolor=fig_fc,
|
|
||||||
framealpha=0.1,
|
|
||||||
bbox_to_anchor=(0.0, 1.15, 1.0, 0.102),
|
|
||||||
loc=2,
|
|
||||||
)
|
|
||||||
legend = plt.gca().add_artist(legend)
|
|
||||||
|
|
||||||
signal_theme = theme.get("signal", {})
|
|
||||||
_plot_line(
|
|
||||||
time_axis,
|
|
||||||
samples,
|
|
||||||
signal_theme,
|
|
||||||
xlabel="Time (seconds)",
|
|
||||||
ylabel="Signal{}".format(" (scaled)" if scale_signal else ""),
|
|
||||||
)
|
|
||||||
detections_theme = theme.get("detections", {})
|
|
||||||
_plot_detections(ax, detections, detections_theme)
|
|
||||||
plt.title("Channel {}".format(sid), fontsize=10)
|
|
||||||
|
|
||||||
plt.xticks(fontsize=8)
|
|
||||||
plt.yticks(fontsize=8)
|
|
||||||
plt.tight_layout()
|
|
||||||
|
|
||||||
if save_as is not None:
|
|
||||||
plt.savefig(save_as, dpi=dpi)
|
|
||||||
if show:
|
|
||||||
plt.show()
|
|
@ -1,179 +0,0 @@
|
|||||||
"""
|
|
||||||
Module for basic audio signal processing and array operations.
|
|
||||||
|
|
||||||
.. autosummary::
|
|
||||||
:toctree: generated/
|
|
||||||
|
|
||||||
to_array
|
|
||||||
extract_single_channel
|
|
||||||
compute_average_channel
|
|
||||||
compute_average_channel_stereo
|
|
||||||
separate_channels
|
|
||||||
calculate_energy_single_channel
|
|
||||||
calculate_energy_multichannel
|
|
||||||
"""
|
|
||||||
from array import array as array_
|
|
||||||
import audioop
|
|
||||||
import math
|
|
||||||
|
|
||||||
FORMAT = {1: "b", 2: "h", 4: "i"}
|
|
||||||
_EPSILON = 1e-10
|
|
||||||
|
|
||||||
|
|
||||||
def to_array(data, sample_width, channels):
|
|
||||||
"""Extract individual channels of audio data and return a list of arrays of
|
|
||||||
numeric samples. This will always return a list of `array.array` objects
|
|
||||||
(one per channel) even if audio data is mono.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
data : bytes
|
|
||||||
raw audio data.
|
|
||||||
sample_width : int
|
|
||||||
size in bytes of one audio sample (one channel considered).
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
samples_arrays : list
|
|
||||||
list of arrays of audio samples.
|
|
||||||
"""
|
|
||||||
fmt = FORMAT[sample_width]
|
|
||||||
if channels == 1:
|
|
||||||
return [array_(fmt, data)]
|
|
||||||
return separate_channels(data, fmt, channels)
|
|
||||||
|
|
||||||
|
|
||||||
def extract_single_channel(data, fmt, channels, selected):
|
|
||||||
samples = array_(fmt, data)
|
|
||||||
return samples[selected::channels]
|
|
||||||
|
|
||||||
|
|
||||||
def compute_average_channel(data, fmt, channels):
|
|
||||||
"""
|
|
||||||
Compute and return average channel of multi-channel audio data. If the
|
|
||||||
number of channels is 2, use :func:`compute_average_channel_stereo` (much
|
|
||||||
faster). This function uses satandard `array` module to convert `bytes` data
|
|
||||||
into an array of numeric values.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
data : bytes
|
|
||||||
multi-channel audio data to mix down.
|
|
||||||
fmt : str
|
|
||||||
format (single character) to pass to `array.array` to convert `data`
|
|
||||||
into an array of samples. This should be "b" if audio data's sample width
|
|
||||||
is 1, "h" if it's 2 and "i" if it's 4.
|
|
||||||
channels : int
|
|
||||||
number of channels of audio data.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
mono_audio : bytes
|
|
||||||
mixed down audio data.
|
|
||||||
"""
|
|
||||||
all_channels = array_(fmt, data)
|
|
||||||
mono_channels = [
|
|
||||||
array_(fmt, all_channels[ch::channels]) for ch in range(channels)
|
|
||||||
]
|
|
||||||
avg_arr = array_(
|
|
||||||
fmt,
|
|
||||||
(round(sum(samples) / channels) for samples in zip(*mono_channels)),
|
|
||||||
)
|
|
||||||
return avg_arr
|
|
||||||
|
|
||||||
|
|
||||||
def compute_average_channel_stereo(data, sample_width):
|
|
||||||
"""Compute and return average channel of stereo audio data. This function
|
|
||||||
should be used when the number of channels is exactly 2 because in that
|
|
||||||
case we can use standard `audioop` module which *much* faster then calling
|
|
||||||
:func:`compute_average_channel`.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
data : bytes
|
|
||||||
2-channel audio data to mix down.
|
|
||||||
sample_width : int
|
|
||||||
size in bytes of one audio sample (one channel considered).
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
mono_audio : bytes
|
|
||||||
mixed down audio data.
|
|
||||||
"""
|
|
||||||
fmt = FORMAT[sample_width]
|
|
||||||
arr = array_(fmt, audioop.tomono(data, sample_width, 0.5, 0.5))
|
|
||||||
return arr
|
|
||||||
|
|
||||||
|
|
||||||
def separate_channels(data, fmt, channels):
|
|
||||||
"""Create a list of arrays of audio samples (`array.array` objects), one for
|
|
||||||
each channel.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
data : bytes
|
|
||||||
multi-channel audio data to mix down.
|
|
||||||
fmt : str
|
|
||||||
format (single character) to pass to `array.array` to convert `data`
|
|
||||||
into an array of samples. This should be "b" if audio data's sample width
|
|
||||||
is 1, "h" if it's 2 and "i" if it's 4.
|
|
||||||
channels : int
|
|
||||||
number of channels of audio data.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
channels_arr : list
|
|
||||||
list of audio channels, each as a standard `array.array`.
|
|
||||||
"""
|
|
||||||
all_channels = array_(fmt, data)
|
|
||||||
mono_channels = [
|
|
||||||
array_(fmt, all_channels[ch::channels]) for ch in range(channels)
|
|
||||||
]
|
|
||||||
return mono_channels
|
|
||||||
|
|
||||||
|
|
||||||
def calculate_energy_single_channel(data, sample_width):
|
|
||||||
"""Calculate the energy of mono audio data. Energy is computed as:
|
|
||||||
|
|
||||||
.. math:: energy = 20 \log(\sqrt({1}/{N}\sum_{i}^{N}{a_i}^2)) % # noqa: W605
|
|
||||||
|
|
||||||
where `a_i` is the i-th audio sample and `N` is the number of audio samples
|
|
||||||
in data.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
data : bytes
|
|
||||||
single-channel audio data.
|
|
||||||
sample_width : int
|
|
||||||
size in bytes of one audio sample.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
energy : float
|
|
||||||
energy of audio signal.
|
|
||||||
"""
|
|
||||||
energy_sqrt = max(audioop.rms(data, sample_width), _EPSILON)
|
|
||||||
return 20 * math.log10(energy_sqrt)
|
|
||||||
|
|
||||||
|
|
||||||
def calculate_energy_multichannel(x, sample_width, aggregation_fn=max):
|
|
||||||
"""Calculate the energy of multi-channel audio data. Energy is calculated
|
|
||||||
channel-wise. An aggregation function is applied to the resulting energies
|
|
||||||
(default: `max`). Also see :func:`calculate_energy_single_channel`.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
data : bytes
|
|
||||||
single-channel audio data.
|
|
||||||
sample_width : int
|
|
||||||
size in bytes of one audio sample (one channel considered).
|
|
||||||
aggregation_fn : callable, default: max
|
|
||||||
aggregation function to apply to the resulting per-channel energies.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
energy : float
|
|
||||||
aggregated energy of multi-channel audio signal.
|
|
||||||
"""
|
|
||||||
energies = (calculate_energy_single_channel(xi, sample_width) for xi in x)
|
|
||||||
return aggregation_fn(energies)
|
|
@ -1,30 +0,0 @@
|
|||||||
import numpy as np
|
|
||||||
from .signal import (
|
|
||||||
compute_average_channel_stereo,
|
|
||||||
calculate_energy_single_channel,
|
|
||||||
calculate_energy_multichannel,
|
|
||||||
)
|
|
||||||
|
|
||||||
FORMAT = {1: np.int8, 2: np.int16, 4: np.int32}
|
|
||||||
|
|
||||||
|
|
||||||
def to_array(data, sample_width, channels):
|
|
||||||
fmt = FORMAT[sample_width]
|
|
||||||
if channels == 1:
|
|
||||||
return np.frombuffer(data, dtype=fmt).astype(np.float64)
|
|
||||||
return separate_channels(data, fmt, channels).astype(np.float64)
|
|
||||||
|
|
||||||
|
|
||||||
def extract_single_channel(data, fmt, channels, selected):
|
|
||||||
samples = np.frombuffer(data, dtype=fmt)
|
|
||||||
return np.asanyarray(samples[selected::channels], order="C")
|
|
||||||
|
|
||||||
|
|
||||||
def compute_average_channel(data, fmt, channels):
|
|
||||||
array = np.frombuffer(data, dtype=fmt).astype(np.float64)
|
|
||||||
return array.reshape(-1, channels).mean(axis=1).round().astype(fmt)
|
|
||||||
|
|
||||||
|
|
||||||
def separate_channels(data, fmt, channels):
|
|
||||||
array = np.frombuffer(data, dtype=fmt)
|
|
||||||
return np.asanyarray(array.reshape(-1, channels).T, order="C")
|
|
File diff suppressed because it is too large
Load Diff
@ -1,427 +0,0 @@
|
|||||||
import os
|
|
||||||
import sys
|
|
||||||
from tempfile import NamedTemporaryFile
|
|
||||||
from abc import ABCMeta, abstractmethod
|
|
||||||
from threading import Thread
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
from collections import namedtuple
|
|
||||||
import wave
|
|
||||||
import subprocess
|
|
||||||
from queue import Queue, Empty
|
|
||||||
from .io import _guess_audio_format
|
|
||||||
from .util import AudioDataSource, make_duration_formatter
|
|
||||||
from .core import split
|
|
||||||
from .exceptions import (
|
|
||||||
EndOfProcessing,
|
|
||||||
AudioEncodingError,
|
|
||||||
AudioEncodingWarning,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
_STOP_PROCESSING = "STOP_PROCESSING"
|
|
||||||
_Detection = namedtuple("_Detection", "id start end duration")
|
|
||||||
|
|
||||||
|
|
||||||
def _run_subprocess(command):
|
|
||||||
try:
|
|
||||||
with subprocess.Popen(
|
|
||||||
command,
|
|
||||||
stdin=open(os.devnull, "rb"),
|
|
||||||
stdout=subprocess.PIPE,
|
|
||||||
stderr=subprocess.PIPE,
|
|
||||||
) as proc:
|
|
||||||
stdout, stderr = proc.communicate()
|
|
||||||
return proc.returncode, stdout, stderr
|
|
||||||
except Exception:
|
|
||||||
err_msg = "Couldn't export audio using command: '{}'".format(command)
|
|
||||||
raise AudioEncodingError(err_msg)
|
|
||||||
|
|
||||||
|
|
||||||
class Worker(Thread, metaclass=ABCMeta):
|
|
||||||
def __init__(self, timeout=0.5, logger=None):
|
|
||||||
self._timeout = timeout
|
|
||||||
self._logger = logger
|
|
||||||
self._inbox = Queue()
|
|
||||||
Thread.__init__(self)
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
while True:
|
|
||||||
message = self._get_message()
|
|
||||||
if message == _STOP_PROCESSING:
|
|
||||||
break
|
|
||||||
if message is not None:
|
|
||||||
self._process_message(message)
|
|
||||||
self._post_process()
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def _process_message(self, message):
|
|
||||||
"""Process incoming messages"""
|
|
||||||
|
|
||||||
def _post_process(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _log(self, message):
|
|
||||||
self._logger.info(message)
|
|
||||||
|
|
||||||
def _stop_requested(self):
|
|
||||||
try:
|
|
||||||
message = self._inbox.get_nowait()
|
|
||||||
if message == _STOP_PROCESSING:
|
|
||||||
return True
|
|
||||||
except Empty:
|
|
||||||
return False
|
|
||||||
|
|
||||||
def stop(self):
|
|
||||||
self.send(_STOP_PROCESSING)
|
|
||||||
self.join()
|
|
||||||
|
|
||||||
def send(self, message):
|
|
||||||
self._inbox.put(message)
|
|
||||||
|
|
||||||
def _get_message(self):
|
|
||||||
try:
|
|
||||||
message = self._inbox.get(timeout=self._timeout)
|
|
||||||
return message
|
|
||||||
except Empty:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
class TokenizerWorker(Worker, AudioDataSource):
|
|
||||||
def __init__(self, reader, observers=None, logger=None, **kwargs):
|
|
||||||
self._observers = observers if observers is not None else []
|
|
||||||
self._reader = reader
|
|
||||||
self._audio_region_gen = split(self, **kwargs)
|
|
||||||
self._detections = []
|
|
||||||
self._log_format = "[DET]: Detection {0.id} (start: {0.start:.3f}, "
|
|
||||||
self._log_format += "end: {0.end:.3f}, duration: {0.duration:.3f})"
|
|
||||||
Worker.__init__(self, timeout=0.2, logger=logger)
|
|
||||||
|
|
||||||
def _process_message(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
@property
|
|
||||||
def detections(self):
|
|
||||||
return self._detections
|
|
||||||
|
|
||||||
def _notify_observers(self, message):
|
|
||||||
for observer in self._observers:
|
|
||||||
observer.send(message)
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
self._reader.open()
|
|
||||||
start_processing_timestamp = datetime.now()
|
|
||||||
for _id, audio_region in enumerate(self._audio_region_gen, start=1):
|
|
||||||
timestamp = start_processing_timestamp + timedelta(
|
|
||||||
seconds=audio_region.meta.start
|
|
||||||
)
|
|
||||||
audio_region.meta.timestamp = timestamp
|
|
||||||
detection = _Detection(
|
|
||||||
_id,
|
|
||||||
audio_region.meta.start,
|
|
||||||
audio_region.meta.end,
|
|
||||||
audio_region.duration,
|
|
||||||
)
|
|
||||||
self._detections.append(detection)
|
|
||||||
if self._logger is not None:
|
|
||||||
message = self._log_format.format(detection)
|
|
||||||
self._log(message)
|
|
||||||
self._notify_observers((_id, audio_region))
|
|
||||||
self._notify_observers(_STOP_PROCESSING)
|
|
||||||
self._reader.close()
|
|
||||||
|
|
||||||
def start_all(self):
|
|
||||||
for observer in self._observers:
|
|
||||||
observer.start()
|
|
||||||
self.start()
|
|
||||||
|
|
||||||
def stop_all(self):
|
|
||||||
self.stop()
|
|
||||||
for observer in self._observers:
|
|
||||||
observer.stop()
|
|
||||||
self._reader.close()
|
|
||||||
|
|
||||||
def read(self):
|
|
||||||
if self._stop_requested():
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
return self._reader.read()
|
|
||||||
|
|
||||||
def __getattr__(self, name):
|
|
||||||
return getattr(self._reader, name)
|
|
||||||
|
|
||||||
|
|
||||||
class StreamSaverWorker(Worker):
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
audio_reader,
|
|
||||||
filename,
|
|
||||||
export_format=None,
|
|
||||||
cache_size_sec=0.5,
|
|
||||||
timeout=0.2,
|
|
||||||
):
|
|
||||||
self._reader = audio_reader
|
|
||||||
sample_size_bytes = self._reader.sw * self._reader.ch
|
|
||||||
self._cache_size = cache_size_sec * self._reader.sr * sample_size_bytes
|
|
||||||
self._output_filename = filename
|
|
||||||
self._export_format = _guess_audio_format(export_format, filename)
|
|
||||||
if self._export_format is None:
|
|
||||||
self._export_format = "wav"
|
|
||||||
self._init_output_stream()
|
|
||||||
self._exported = False
|
|
||||||
self._cache = []
|
|
||||||
self._total_cached = 0
|
|
||||||
Worker.__init__(self, timeout=timeout)
|
|
||||||
|
|
||||||
def _get_non_existent_filename(self):
|
|
||||||
filename = self._output_filename + ".wav"
|
|
||||||
i = 0
|
|
||||||
while os.path.exists(filename):
|
|
||||||
i += 1
|
|
||||||
filename = self._output_filename + "({}).wav".format(i)
|
|
||||||
return filename
|
|
||||||
|
|
||||||
def _init_output_stream(self):
|
|
||||||
if self._export_format != "wav":
|
|
||||||
self._tmp_output_filename = self._get_non_existent_filename()
|
|
||||||
else:
|
|
||||||
self._tmp_output_filename = self._output_filename
|
|
||||||
self._wfp = wave.open(self._tmp_output_filename, "wb")
|
|
||||||
self._wfp.setframerate(self._reader.sr)
|
|
||||||
self._wfp.setsampwidth(self._reader.sw)
|
|
||||||
self._wfp.setnchannels(self._reader.ch)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def sr(self):
|
|
||||||
return self._reader.sampling_rate
|
|
||||||
|
|
||||||
@property
|
|
||||||
def sw(self):
|
|
||||||
return self._reader.sample_width
|
|
||||||
|
|
||||||
@property
|
|
||||||
def ch(self):
|
|
||||||
return self._reader.channels
|
|
||||||
|
|
||||||
def __del__(self):
|
|
||||||
self._post_process()
|
|
||||||
|
|
||||||
if (
|
|
||||||
(self._tmp_output_filename != self._output_filename)
|
|
||||||
and self._exported
|
|
||||||
and os.path.exists(self._tmp_output_filename)
|
|
||||||
):
|
|
||||||
os.remove(self._tmp_output_filename)
|
|
||||||
|
|
||||||
def _process_message(self, data):
|
|
||||||
self._cache.append(data)
|
|
||||||
self._total_cached += len(data)
|
|
||||||
if self._total_cached >= self._cache_size:
|
|
||||||
self._write_cached_data()
|
|
||||||
|
|
||||||
def _post_process(self):
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
data = self._inbox.get_nowait()
|
|
||||||
if data != _STOP_PROCESSING:
|
|
||||||
self._cache.append(data)
|
|
||||||
self._total_cached += len(data)
|
|
||||||
except Empty:
|
|
||||||
break
|
|
||||||
self._write_cached_data()
|
|
||||||
self._wfp.close()
|
|
||||||
|
|
||||||
def _write_cached_data(self):
|
|
||||||
if self._cache:
|
|
||||||
data = b"".join(self._cache)
|
|
||||||
self._wfp.writeframes(data)
|
|
||||||
self._cache = []
|
|
||||||
self._total_cached = 0
|
|
||||||
|
|
||||||
def open(self):
|
|
||||||
self._reader.open()
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
self._reader.close()
|
|
||||||
self.stop()
|
|
||||||
|
|
||||||
def rewind(self):
|
|
||||||
# ensure compatibility with AudioDataSource with record=True
|
|
||||||
pass
|
|
||||||
|
|
||||||
@property
|
|
||||||
def data(self):
|
|
||||||
with wave.open(self._tmp_output_filename, "rb") as wfp:
|
|
||||||
return wfp.readframes(-1)
|
|
||||||
|
|
||||||
def save_stream(self):
|
|
||||||
if self._exported:
|
|
||||||
return self._output_filename
|
|
||||||
|
|
||||||
if self._export_format in ("raw", "wav"):
|
|
||||||
if self._export_format == "raw":
|
|
||||||
self._export_raw()
|
|
||||||
self._exported = True
|
|
||||||
return self._output_filename
|
|
||||||
try:
|
|
||||||
self._export_with_ffmpeg_or_avconv()
|
|
||||||
except AudioEncodingError:
|
|
||||||
try:
|
|
||||||
self._export_with_sox()
|
|
||||||
except AudioEncodingError:
|
|
||||||
warn_msg = "Couldn't save audio data in the desired format "
|
|
||||||
warn_msg += "'{}'. Either none of 'ffmpeg', 'avconv' or 'sox' "
|
|
||||||
warn_msg += "is installed or this format is not recognized.\n"
|
|
||||||
warn_msg += "Audio file was saved as '{}'"
|
|
||||||
raise AudioEncodingWarning(
|
|
||||||
warn_msg.format(
|
|
||||||
self._export_format, self._tmp_output_filename
|
|
||||||
)
|
|
||||||
)
|
|
||||||
finally:
|
|
||||||
self._exported = True
|
|
||||||
return self._output_filename
|
|
||||||
|
|
||||||
def _export_raw(self):
|
|
||||||
with open(self._output_filename, "wb") as wfp:
|
|
||||||
wfp.write(self.data)
|
|
||||||
|
|
||||||
def _export_with_ffmpeg_or_avconv(self):
|
|
||||||
command = [
|
|
||||||
"-y",
|
|
||||||
"-f",
|
|
||||||
"wav",
|
|
||||||
"-i",
|
|
||||||
self._tmp_output_filename,
|
|
||||||
"-f",
|
|
||||||
self._export_format,
|
|
||||||
self._output_filename,
|
|
||||||
]
|
|
||||||
returncode, stdout, stderr = _run_subprocess(["ffmpeg"] + command)
|
|
||||||
if returncode != 0:
|
|
||||||
returncode, stdout, stderr = _run_subprocess(["avconv"] + command)
|
|
||||||
if returncode != 0:
|
|
||||||
raise AudioEncodingError(stderr)
|
|
||||||
return stdout, stderr
|
|
||||||
|
|
||||||
def _export_with_sox(self):
|
|
||||||
command = [
|
|
||||||
"sox",
|
|
||||||
"-t",
|
|
||||||
"wav",
|
|
||||||
self._tmp_output_filename,
|
|
||||||
self._output_filename,
|
|
||||||
]
|
|
||||||
returncode, stdout, stderr = _run_subprocess(command)
|
|
||||||
if returncode != 0:
|
|
||||||
raise AudioEncodingError(stderr)
|
|
||||||
return stdout, stderr
|
|
||||||
|
|
||||||
def close_output(self):
|
|
||||||
self._wfp.close()
|
|
||||||
|
|
||||||
def read(self):
|
|
||||||
data = self._reader.read()
|
|
||||||
if data is not None:
|
|
||||||
self.send(data)
|
|
||||||
else:
|
|
||||||
self.send(_STOP_PROCESSING)
|
|
||||||
return data
|
|
||||||
|
|
||||||
def __getattr__(self, name):
|
|
||||||
if name == "data":
|
|
||||||
return self.data
|
|
||||||
return getattr(self._reader, name)
|
|
||||||
|
|
||||||
|
|
||||||
class PlayerWorker(Worker):
|
|
||||||
def __init__(self, player, progress_bar=False, timeout=0.2, logger=None):
|
|
||||||
self._player = player
|
|
||||||
self._progress_bar = progress_bar
|
|
||||||
self._log_format = "[PLAY]: Detection {id} played"
|
|
||||||
Worker.__init__(self, timeout=timeout, logger=logger)
|
|
||||||
|
|
||||||
def _process_message(self, message):
|
|
||||||
_id, audio_region = message
|
|
||||||
if self._logger is not None:
|
|
||||||
message = self._log_format.format(id=_id)
|
|
||||||
self._log(message)
|
|
||||||
audio_region.play(
|
|
||||||
player=self._player, progress_bar=self._progress_bar, leave=False
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class RegionSaverWorker(Worker):
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
filename_format,
|
|
||||||
audio_format=None,
|
|
||||||
timeout=0.2,
|
|
||||||
logger=None,
|
|
||||||
**audio_parameters
|
|
||||||
):
|
|
||||||
self._filename_format = filename_format
|
|
||||||
self._audio_format = audio_format
|
|
||||||
self._audio_parameters = audio_parameters
|
|
||||||
self._debug_format = "[SAVE]: Detection {id} saved as '{filename}'"
|
|
||||||
Worker.__init__(self, timeout=timeout, logger=logger)
|
|
||||||
|
|
||||||
def _process_message(self, message):
|
|
||||||
_id, audio_region = message
|
|
||||||
filename = self._filename_format.format(
|
|
||||||
id=_id,
|
|
||||||
start=audio_region.meta.start,
|
|
||||||
end=audio_region.meta.end,
|
|
||||||
duration=audio_region.duration,
|
|
||||||
)
|
|
||||||
filename = audio_region.save(
|
|
||||||
filename, self._audio_format, **self._audio_parameters
|
|
||||||
)
|
|
||||||
if self._logger:
|
|
||||||
message = self._debug_format.format(id=_id, filename=filename)
|
|
||||||
self._log(message)
|
|
||||||
|
|
||||||
|
|
||||||
class CommandLineWorker(Worker):
|
|
||||||
def __init__(self, command, timeout=0.2, logger=None):
|
|
||||||
self._command = command
|
|
||||||
Worker.__init__(self, timeout=timeout, logger=logger)
|
|
||||||
self._debug_format = "[COMMAND]: Detection {id} command: '{command}'"
|
|
||||||
|
|
||||||
def _process_message(self, message):
|
|
||||||
_id, audio_region = message
|
|
||||||
with NamedTemporaryFile(delete=False) as file:
|
|
||||||
filename = audio_region.save(file.name, audio_format="wav")
|
|
||||||
command = self._command.format(file=filename)
|
|
||||||
os.system(command)
|
|
||||||
if self._logger is not None:
|
|
||||||
message = self._debug_format.format(id=_id, command=command)
|
|
||||||
self._log(message)
|
|
||||||
|
|
||||||
|
|
||||||
class PrintWorker(Worker):
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
print_format="{start} {end}",
|
|
||||||
time_format="%S",
|
|
||||||
timestamp_format="%Y/%m/%d %H:%M:%S.%f",
|
|
||||||
timeout=0.2,
|
|
||||||
):
|
|
||||||
|
|
||||||
self._print_format = print_format
|
|
||||||
self._format_time = make_duration_formatter(time_format)
|
|
||||||
self._timestamp_format = timestamp_format
|
|
||||||
self.detections = []
|
|
||||||
Worker.__init__(self, timeout=timeout)
|
|
||||||
|
|
||||||
def _process_message(self, message):
|
|
||||||
_id, audio_region = message
|
|
||||||
timestamp = audio_region.meta.timestamp
|
|
||||||
timestamp = timestamp.strftime(self._timestamp_format)
|
|
||||||
text = self._print_format.format(
|
|
||||||
id=_id,
|
|
||||||
start=self._format_time(audio_region.meta.start),
|
|
||||||
end=self._format_time(audio_region.meta.end),
|
|
||||||
duration=self._format_time(audio_region.duration),
|
|
||||||
timestamp=timestamp,
|
|
||||||
)
|
|
||||||
print(text)
|
|
Loading…
Reference in new issue