You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
180 lines
5.1 KiB
180 lines
5.1 KiB
"""
|
|
Module for basic audio signal processing and array operations.
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
to_array
|
|
extract_single_channel
|
|
compute_average_channel
|
|
compute_average_channel_stereo
|
|
separate_channels
|
|
calculate_energy_single_channel
|
|
calculate_energy_multichannel
|
|
"""
|
|
from array import array as array_
|
|
import audioop
|
|
import math
|
|
|
|
FORMAT = {1: "b", 2: "h", 4: "i"}
|
|
_EPSILON = 1e-10
|
|
|
|
|
|
def to_array(data, sample_width, channels):
|
|
"""Extract individual channels of audio data and return a list of arrays of
|
|
numeric samples. This will always return a list of `array.array` objects
|
|
(one per channel) even if audio data is mono.
|
|
|
|
Parameters
|
|
----------
|
|
data : bytes
|
|
raw audio data.
|
|
sample_width : int
|
|
size in bytes of one audio sample (one channel considered).
|
|
|
|
Returns
|
|
-------
|
|
samples_arrays : list
|
|
list of arrays of audio samples.
|
|
"""
|
|
fmt = FORMAT[sample_width]
|
|
if channels == 1:
|
|
return [array_(fmt, data)]
|
|
return separate_channels(data, fmt, channels)
|
|
|
|
|
|
def extract_single_channel(data, fmt, channels, selected):
|
|
samples = array_(fmt, data)
|
|
return samples[selected::channels]
|
|
|
|
|
|
def compute_average_channel(data, fmt, channels):
|
|
"""
|
|
Compute and return average channel of multi-channel audio data. If the
|
|
number of channels is 2, use :func:`compute_average_channel_stereo` (much
|
|
faster). This function uses satandard `array` module to convert `bytes` data
|
|
into an array of numeric values.
|
|
|
|
Parameters
|
|
----------
|
|
data : bytes
|
|
multi-channel audio data to mix down.
|
|
fmt : str
|
|
format (single character) to pass to `array.array` to convert `data`
|
|
into an array of samples. This should be "b" if audio data's sample width
|
|
is 1, "h" if it's 2 and "i" if it's 4.
|
|
channels : int
|
|
number of channels of audio data.
|
|
|
|
Returns
|
|
-------
|
|
mono_audio : bytes
|
|
mixed down audio data.
|
|
"""
|
|
all_channels = array_(fmt, data)
|
|
mono_channels = [
|
|
array_(fmt, all_channels[ch::channels]) for ch in range(channels)
|
|
]
|
|
avg_arr = array_(
|
|
fmt,
|
|
(round(sum(samples) / channels) for samples in zip(*mono_channels)),
|
|
)
|
|
return avg_arr
|
|
|
|
|
|
def compute_average_channel_stereo(data, sample_width):
|
|
"""Compute and return average channel of stereo audio data. This function
|
|
should be used when the number of channels is exactly 2 because in that
|
|
case we can use standard `audioop` module which *much* faster then calling
|
|
:func:`compute_average_channel`.
|
|
|
|
Parameters
|
|
----------
|
|
data : bytes
|
|
2-channel audio data to mix down.
|
|
sample_width : int
|
|
size in bytes of one audio sample (one channel considered).
|
|
|
|
Returns
|
|
-------
|
|
mono_audio : bytes
|
|
mixed down audio data.
|
|
"""
|
|
fmt = FORMAT[sample_width]
|
|
arr = array_(fmt, audioop.tomono(data, sample_width, 0.5, 0.5))
|
|
return arr
|
|
|
|
|
|
def separate_channels(data, fmt, channels):
|
|
"""Create a list of arrays of audio samples (`array.array` objects), one for
|
|
each channel.
|
|
|
|
Parameters
|
|
----------
|
|
data : bytes
|
|
multi-channel audio data to mix down.
|
|
fmt : str
|
|
format (single character) to pass to `array.array` to convert `data`
|
|
into an array of samples. This should be "b" if audio data's sample width
|
|
is 1, "h" if it's 2 and "i" if it's 4.
|
|
channels : int
|
|
number of channels of audio data.
|
|
|
|
Returns
|
|
-------
|
|
channels_arr : list
|
|
list of audio channels, each as a standard `array.array`.
|
|
"""
|
|
all_channels = array_(fmt, data)
|
|
mono_channels = [
|
|
array_(fmt, all_channels[ch::channels]) for ch in range(channels)
|
|
]
|
|
return mono_channels
|
|
|
|
|
|
def calculate_energy_single_channel(data, sample_width):
|
|
"""Calculate the energy of mono audio data. Energy is computed as:
|
|
|
|
.. math:: energy = 20 \log(\sqrt({1}/{N}\sum_{i}^{N}{a_i}^2)) % # noqa: W605
|
|
|
|
where `a_i` is the i-th audio sample and `N` is the number of audio samples
|
|
in data.
|
|
|
|
Parameters
|
|
----------
|
|
data : bytes
|
|
single-channel audio data.
|
|
sample_width : int
|
|
size in bytes of one audio sample.
|
|
|
|
Returns
|
|
-------
|
|
energy : float
|
|
energy of audio signal.
|
|
"""
|
|
energy_sqrt = max(audioop.rms(data, sample_width), _EPSILON)
|
|
return 20 * math.log10(energy_sqrt)
|
|
|
|
|
|
def calculate_energy_multichannel(x, sample_width, aggregation_fn=max):
|
|
"""Calculate the energy of multi-channel audio data. Energy is calculated
|
|
channel-wise. An aggregation function is applied to the resulting energies
|
|
(default: `max`). Also see :func:`calculate_energy_single_channel`.
|
|
|
|
Parameters
|
|
----------
|
|
data : bytes
|
|
single-channel audio data.
|
|
sample_width : int
|
|
size in bytes of one audio sample (one channel considered).
|
|
aggregation_fn : callable, default: max
|
|
aggregation function to apply to the resulting per-channel energies.
|
|
|
|
Returns
|
|
-------
|
|
energy : float
|
|
aggregated energy of multi-channel audio signal.
|
|
"""
|
|
energies = (calculate_energy_single_channel(xi, sample_width) for xi in x)
|
|
return aggregation_fn(energies)
|