-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathutils.py
70 lines (53 loc) · 2.11 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
"""Utility functions for MRNN.
Reference: Jinsung Yoon, William R. Zame and Mihaela van der Schaar,
"Estimating Missing Data in Temporal Data Streams Using
Multi-Directional Recurrent Neural Networks,"
in IEEE Transactions on Biomedical Engineering,
vol. 66, no. 5, pp. 1477-1490, May 2019.
Paper Link: https://ieeexplore.ieee.org/document/8485748
Contact: [email protected]
--------------------------------------------------
(1) MinMaxScaler
(2) Imputation performance
"""
# Necessary packages
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
def MinMaxScaler(data):
"""Normalization tool: Min Max Scaler.
Args:
- data: raw input data
Returns:
- normalized_data: minmax normalized data
- norm_parameters: normalization parameters for rescaling if needed
"""
min_val = np.min(data, axis = 0)
data = data - min_val
max_val = np.max(data, axis = 0) + 1e-8
normalized_data = data / max_val
norm_parameters = {'min_val': min_val, 'max_val': max_val}
return normalized_data, norm_parameters
def imputation_performance (ori_x, imputed_x, m, metric_name):
"""Performance metrics for imputation.
Args:
- ori_x: original complete data (without missing values)
- imputed_x: imputed data from incomplete data
- m: observation indicator
- metric_name: mae, mse, or rmse
Returns:
- performance: imputation performance in terms or mae, mse, or rmse
"""
assert metric_name in ['mae','mse','rmse']
no, seq_len, dim = ori_x.shape
# Reshape 3d array to 2d array
ori_x = np.reshape(ori_x, [no * seq_len, dim])
imputed_x = np.reshape(imputed_x, [no * seq_len, dim])
m = np.reshape(m, [no * seq_len, dim])
# Only compute the imputation performance if m = 0 (missing)
if metric_name == 'mae':
performance = mean_absolute_error(ori_x, imputed_x, 1-m)
elif metric_name == 'mse':
performance = mean_squared_error(ori_x, imputed_x, 1-m)
elif metric_name == 'rmse':
performance = np.sqrt(mean_squared_error(ori_x, imputed_x, 1-m))
return performance