Source code for pydiogment.auga

"""
- Description: amplitude based augmentation techniques/manipulations for audio data.
"""
import os
import numpy as np
from .utils.io import read_file, write_file


[docs]def apply_gain(infile, gain): """ Apply gain to infile. Args: - infile (str) : input filename/path. - gain (float) : gain in dB (both positive and negative). """ # read input file fs, x = read_file(filename=infile) # apply gain x = np.copy(x) x = x * (10**(gain / 10.0)) x = np.minimum(np.maximum(-1.0, x), 1.0) x /= np.mean(np.abs(x)) # export data to file output_file_path = os.path.dirname(infile) name_attribute = "_augmented_with_%s_gain.wav" % str(gain) write_file(output_file_path=output_file_path, input_file_name=infile, name_attribute=name_attribute, sig=x, fs=fs)
[docs]def add_noise(infile, snr): """ Augment data using noise injection. Note: It simply add some random values to the input file data based on the snr. Args: - infile (str) : input filename/path. - snr (int) : signal to noise ratio in dB. """ # read input file fs, sig = read_file(filename=infile) # compute and apply noise noise = np.random.randn(len(sig)) # compute powers noise_power = np.mean(np.power(noise, 2)) sig_power = np.mean(np.power(sig, 2)) # compute snr and scaling factor snr_linear = 10**(snr / 10.0) noise_factor = (sig_power / noise_power) * (1 / snr_linear) # add noise y = sig + np.sqrt(noise_factor) * noise # construct file names output_file_path = os.path.dirname(infile) name_attribute = "_augmented_%s_noisy.wav" % snr # export data to file write_file(output_file_path=output_file_path, input_file_name=infile, name_attribute=name_attribute, sig=y, fs=fs)
[docs]def fade_in_and_out(infile): """ Add a fade in and out effect to the audio file. Args: - infile (str) : input filename/path. """ # read input file fs, sig = read_file(filename=infile) window = np.hamming(len(sig)) # construct file names output_file_path = os.path.dirname(infile) name_attribute = "_augmented_fade_in_out.wav" # fade in and out window = np.hamming(len(sig)) augmented_sig = window * sig augmented_sig /= np.mean(np.abs(augmented_sig)) # export data to file write_file(output_file_path=output_file_path, input_file_name=infile, name_attribute=name_attribute, sig=augmented_sig, fs=fs)
[docs]def normalize(infile, normalization_technique="peak", rms_level=0): """ Normalize the signal given a certain technique (peak or rms). Args: - infile (str) : input filename/path. - normalization_technique (str) : type of normalization technique to use. (default is peak) - rms_level (int) : rms level in dB. """ # read input file fs, sig = read_file(filename=infile) # normalize signal if normalization_technique == "peak" : y = sig / np.max(sig) elif normalization_technique == "rms": # linear rms level and scaling factor r = 10**(rms_level / 10.0) a = np.sqrt( (len(sig) * r**2) / np.sum(sig**2) ) # normalize y = sig * a else : print("ParameterError: Unknown normalization_technique variable.") # construct file names output_file_path = os.path.dirname(infile) name_attribute = "_augmented_{}_normalized.wav".format(normalization_technique) # export data to file write_file(output_file_path=output_file_path, input_file_name=infile, name_attribute=name_attribute, sig=y, fs=fs)