from scipy.fftpack import fft from scipy.io import wavfile # get the api from scipy import arange from python_speech_features import mfcc from python_speech_features import delta from python_speech_features import logfbank import numpy as np import glob import os import csv from pydub import AudioSegment, scipy_effects # Chunking function def chunks(l, n): for i in xrange(0, len(l), n): yield l[i:i+n] # Resampling function def resample(arr, newLength): chunkSize = len(arr)/newLength return [np.mean(chunk) for chunk in chunks(arr, chunkSize)] files = os.listdir("piano/wavs") csvfile = open("output_piano_note_present.csv", "w") csvfile1 = open("labels_piano_note_present.csv", "w") csvfile2 = open("output_piano_note_not_present.csv", "w") csvfile3 = open("labels_piano_note_not_present.csv", "w") writer = csv.writer(csvfile, delimiter=',') writer1 = csv.writer(csvfile1, delimiter=',') writer2 = csv.writer(csvfile2, delimiter=',') writer3 = csv.writer(csvfile3, delimiter=',') counter = 0 for filename in files: if(".wav" not in filename): continue if(counter % 100 == 0): print counter filename1 = "piano/wavs/" + filename try: song = AudioSegment.from_wav(filename1)#(filename) except OSError: continue song = song.set_channels(1) y = song.get_array_of_samples() y = np.array(y) fs = song.frame_rate Ts = 1.0/fs t = arange(0,1,Ts) n = len(y) k = arange(n) T = n/fs # where fs is the sampling frequency frqLabel = k/T frq = frqLabel[range(n/2)] frq = frq[:6000] Y = fft(y)/n # calculate fourier transform (complex numbers list) Y = Y[range(n/2)] # you only need half of the fft list (real signal symmetry) Y = Y[:6000] if("C3" in filename): writer1.writerow("1") writer.writerow(abs(Y[::2])) else: writer3.writerow("0") writer2.writerow(abs(Y[::2])) counter += 1 csvfile.close()