Source code for pydiogment.augt

"""
- Description: time based augmentation techniques/manipulations for audio data.
"""
import os
import math
import random
import warnings
import subprocess
import numpy as np
from .utils.io import read_file, write_file


[docs]def eliminate_silence(infile): """ Eliminate silence from voice file using ffmpeg library. Args: - infile (str) : Path to get the original voice file from. Returns: list including True for successful authentication, False otherwise and a percentage value representing the certainty of the decision. """ # define output name if none specified output_path = infile.split(".wav")[0] + "_augmented_without_silence.wav" # filter silence in wav remove_silence_command = ["ffmpeg", "-i", infile, "-af", "silenceremove=stop_periods=-1:stop_duration=0.25:stop_threshold=-36dB", "-acodec", "pcm_s16le", "-ac", "1", output_path] out = subprocess.Popen(remove_silence_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out.wait() with_silence_duration = os.popen( "ffprobe -i '" + infile + "' -show_format -v quiet | sed -n 's/duration=//p'").read() no_silence_duration = os.popen( "ffprobe -i '" + output_path + "' -show_format -v quiet | sed -n 's/duration=//p'").read() return with_silence_duration, no_silence_duration
[docs]def random_cropping(infile, min_len=1): """ Crop the infile with an input minimum duration. Args: - infile (str) : Input filename. - min_len (float) : Minimum duration for randomly cropped excerpt """ fs, x = read_file(filename=infile) t_end = x.size / fs if (t_end > min_len): # get start and end time start = random.uniform(0.0, t_end - min_len) end = random.uniform(start + min_len, t_end) # crop data y = x[int(math.floor(start * fs)):int(math.ceil(end * fs))] # construct file names output_file_path = os.path.dirname(infile) name_attribute = "_augmented_randomly_cropped_%s.wav" % str(min_len) # export data to file write_file(output_file_path=output_file_path, input_file_name=infile, name_attribute=name_attribute, sig=y, fs=fs) else: warning_msg = """ min_len provided is greater than the duration of the song. """ warnings.warn(warning_msg)
[docs]def slow_down(input_file, coefficient=0.8): """ Slow or stretch a wave. Args: - infile (str) : Input filename. - coefficient (float) : coefficient caracterising the slowing degree. """ # set-up variables for paths and file names name_attribute = "_augmented_slowed.wav" output_file = input_file.split(".wav")[0] + name_attribute # apply slowing command slowing_command = ["ffmpeg", "-i", input_file, "-filter:a", "atempo={0}".format(str(coefficient)), output_file] print(" ".join(slowing_command)) p = subprocess.Popen(slowing_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output, error = p.communicate() print(output, error.decode("utf-8") ) # for i in error.decode("utf-8") : print(i) print("Writing data to " + output_file + ".")
[docs]def speed(input_file, coefficient=1.25): """ Speed or shrink a wave. Args: - infile (str) : Input filename. - coefficient (float) : coefficient caracterising the speeding degree. """ # set-up variables for paths and file names name_attribute = "_augmented_speeded.wav" output_file = input_file.split(".wav")[0] + name_attribute # apply slowing command speeding_command = ["ffmpeg", "-i", input_file, "-filter:a", "atempo={0}".format(str(coefficient)), output_file] _ = subprocess.Popen(speeding_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) print("Writing data to " + output_file + ".")
[docs]def shift_time(infile, tshift, direction): """ Augment audio data by shifting the time in the file. Signal can be shifted to the left or right. Note: Time shifting is simply moving the audio to left/right with a random second. If shifting audio to left (fast forward) with x seconds, first x seconds will mark as 0 (i.e. silence). If shifting audio to right (back forward) with x seconds, last x seconds will mark as 0 (i.e. silence). Args: - infile (str) : Input filename. - tshift (int) : Signal time shift in seconds. - direction (str) : shift direction (to the left or right). """ fs, sig = read_file(filename=infile) shift = int(tshift * fs) * int(direction == "left") - \ int(tshift * fs) * int(direction == "right") # shift time augmented_sig = np.roll(sig, shift) # construct file names output_file_path = os.path.dirname(infile) name_attribute = "_augmented_%s_%s_shifted.wav" % (direction, tshift) # export data to file write_file(output_file_path=output_file_path, input_file_name=infile, name_attribute=name_attribute, sig=augmented_sig, fs=fs)
[docs]def reverse(infile): """ Inverses the input signal to play from the end to the beginning and writes it to an output file Args: - infile (str): Input filename. """ fs, sig = read_file(filename=infile) augmented_sig = sig[::-1] # construct file names output_file_path = os.path.dirname(infile) name_attribute = "_augmented_reversed.wav" # export data to file write_file(output_file_path=output_file_path, input_file_name=infile, name_attribute=name_attribute, sig=augmented_sig, fs=fs)
[docs]def resample_audio(infile, sr): """ Resample the signal according a new input sampling rate with respect to the Nyquist-Shannon theorem. Args: - infile (str) : input filename/path. - sr (int) : new sampling rate. """ # set-up variables for paths and file names output_file = "{0}_augmented_resampled_to_{1}.wav".format(infile.split(".wav")[0], sr) # apply slowing command sampling_command = ["ffmpeg", "-i", infile, "-ar", str(sr), output_file] print(" ".join(sampling_command)) _ = subprocess.Popen(sampling_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)