-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcommon_utils.py
113 lines (87 loc) · 3.14 KB
/
common_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
### THIS FILE CONTAINS COMMON FUNCTIONS, CLASSSES
import tqdm
import time
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from scipy.io import wavfile as wav
from sklearn import preprocessing
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix
def split_dataset(df, columns_to_drop, test_size, random_state):
label_encoder = preprocessing.LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])
df_train, df_test = train_test_split(df, test_size=test_size, random_state=random_state)
df_train2 = df_train.drop(columns_to_drop,axis=1)
y_train2 = df_train['label'].to_numpy()
df_test2 = df_test.drop(columns_to_drop,axis=1)
y_test2 = df_test['label'].to_numpy()
return df_train2, y_train2, df_test2, y_test2
def preprocess_dataset(df_train, df_test):
standard_scaler = preprocessing.StandardScaler()
df_train_scaled = standard_scaler.fit_transform(df_train)
df_test_scaled = standard_scaler.transform(df_test)
return df_train_scaled, df_test_scaled
def set_seed(seed = 0):
'''
set random seed
'''
random.seed(seed)
torch.manual_seed(seed)
np.random.seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
# early stopping obtained from tutorial
class EarlyStopper:
def __init__(self, patience=3, min_delta=0):
self.patience = patience
self.min_delta = min_delta
self.counter = 0
self.min_validation_loss = np.inf
def early_stop(self, validation_loss):
if validation_loss < self.min_validation_loss:
self.min_validation_loss = validation_loss
self.counter = 0
elif validation_loss > (self.min_validation_loss + self.min_delta):
self.counter += 1
if self.counter >= self.patience:
return True
return False
class MLP(nn.Module):
def __init__(self, no_features, no_hidden, no_labels):
super().__init__()
self.mlp_stack = nn.Sequential(
# YOUR CODE HERE
nn.Linear(no_features,no_hidden[0]),
nn.ReLU(), #relu activation fn
nn.Dropout(0.2),
nn.Linear(no_hidden[0],no_hidden[1]),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(no_hidden[1],no_hidden[2]),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(no_hidden[2],no_labels),
nn.Sigmoid()
)
# YOUR CODE HERE
def forward(self,x):
return self.mlp_stack(x)
class CustomDataset(Dataset):
# YOUR CODE HERE
def __init__(self,X,y):
X=np.array(X,dtype=np.float32)
y=np.array(y,dtype=np.float32)
self.X=torch.from_numpy(X)
self.y=torch.from_numpy(y)
def __getitem__(self,index):
return self.X[index], self.y[index]
def __len__(self):
return len(self.X)
loss_fn=nn.BCEWithLogitsLoss()