-
Notifications
You must be signed in to change notification settings - Fork 1
/
feature_extraction.py
133 lines (105 loc) · 5.79 KB
/
feature_extraction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import os
import librosa
import numpy as np
import pandas as pd
# Function to extract MFCC features from an audio file
def extract_mfcc_features(file_path, n_mfcc=40):
audio_data, sampling_rate = librosa.load(file_path)
mfccs = librosa.feature.mfcc(y=audio_data, sr=sampling_rate, n_mfcc=n_mfcc, n_mels=n_mfcc)
return mfccs.T # Transpose to have features in columns
# Function to extract additional features (Chroma, Spectral Contrast, Spectral Bandwidth, etc.)
def extract_additional_features(file_path, feature_types=[]):
audio_data, sampling_rate = librosa.load(file_path)
features = {}
# Chroma features represent the distribution of energy across musical pitch classes.
# Captures the harmonic content of the audio and tonal characteristics
if 'chroma' in feature_types:
chroma = librosa.feature.chroma_stft(y=audio_data, sr=sampling_rate)
features['chroma'] = np.mean(chroma.T, axis=0)
# Spectral contrast measures the difference in amplitude between peaks and valleys in the spectrum.
# Highlights the distinction between harmonic and non-harmonic components.
if 'spectral_contrast' in feature_types:
spectral_contrast = librosa.feature.spectral_contrast(y=audio_data, sr=sampling_rate)
features['spectral_contrast'] = np.mean(spectral_contrast.T, axis=0)
# Spectral bandwidth represents the width of the spectral band. Higher values may indicate a broader spread of
# frequencies in the signal.
if 'spectral_bandwidth' in feature_types:
spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio_data, sr=sampling_rate)
features['spectral_bandwidth'] = np.mean(spectral_bandwidth.T, axis=0)
# Spectral centroid indicates the "center of mass" of the spectrum, providing a measure of where the "center" of
# the frequencies is. It can offer insights into the perceived brightness or tonal quality of the sound.
if 'spectral_centroid' in feature_types:
spectral_centroid = librosa.feature.spectral_centroid(y=audio_data, sr=sampling_rate)
features['spectral_centroid'] = np.mean(spectral_centroid)
# Spectral rolloff is the frequency below which a certain percentage of the total spectral energy lies.
# Gives an indication of the spread of higher frequencies in the signal.
if 'spectral_rolloff' in feature_types:
spectral_rolloff = librosa.feature.spectral_rolloff(y=audio_data, sr=sampling_rate)
features['spectral_rolloff'] = np.mean(spectral_rolloff)
# Zero-crossing rate measures the rate at which the signal changes its sign. It is useful for capturing
# characteristics related to the noisiness or percussiveness of the audio.
if 'zero_crossing_rate' in feature_types:
zero_crossing_rate = librosa.feature.zero_crossing_rate(y=audio_data)
features['zero_crossing_rate'] = np.mean(zero_crossing_rate)
# RMS (Root Mean Square) Energy represents the square root of the mean squared values of the signal.
# It is a measure of the signal's energy and can be indicative of the overall amplitude or loudness.
if 'rmse' in feature_types:
rmse = librosa.feature.rms(y=audio_data)
features['rmse'] = np.mean(rmse)
if 'tempo'in feature_types:
onset_env = librosa.onset.onset_strength(y=audio_data, sr=sampling_rate)
tempo, _ = librosa.beat.beat_track(onset_envelope=onset_env, sr=sampling_rate)
features['tempo'] = tempo
return features
def process_audio_files(directory, output_csv, n_mfcc=40, additional_features=[], normalize_columns=False):
data = {'species': []}
for i in range(n_mfcc):
data[f'mfcc_{i}'] = []
for feature_type in additional_features:
data[feature_type] = []
count = 0
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith(".wav"):
file_path = os.path.join(root, file)
count = count + 1
print(f"{count}/{len(files)} | Extracting Features from: {file_path}")
subfolder_name = os.path.basename(root)
# Extract MFCC features
mfccs = extract_mfcc_features(file_path, n_mfcc)
mfccs_scaled_features = np.mean(mfccs.T, axis=0)
# Extract additional features
additional_features_dict = extract_additional_features(file_path, additional_features)
# Append data to dictionary
data['species'].append(subfolder_name)
for i in range(n_mfcc):
if i < len(mfccs_scaled_features):
data[f'mfcc_{i}'].append(mfccs_scaled_features[i])
else:
data[f'mfcc_{i}'].append(0)
# Append additional features
for feature_type in additional_features:
data[feature_type].append(np.mean(additional_features_dict[feature_type]))
if normalize_columns:
for feature_type in additional_features:
data[feature_type] = librosa.util.normalize(data[feature_type], axis=0)
df = pd.DataFrame(data)
df.to_csv(output_csv, index=False)
def main():
data_processed_dir = "Data_Processed"
output_csv = "features.csv"
n_mfcc = 40
additional_features = ['chroma',
'spectral_contrast',
'spectral_bandwidth',
'spectral_centroid',
'spectral_rolloff',
'zero_crossing_rate',
'rmse',
'tempo'
]
normalize_columns = True
process_audio_files(data_processed_dir, output_csv, n_mfcc, additional_features, normalize_columns)
print(f"Features saved to {output_csv}")
if __name__ == "__main__":
main()