-
Notifications
You must be signed in to change notification settings - Fork 0
/
2-7-specifying-additional-variable-information.py
39 lines (31 loc) · 1.27 KB
/
2-7-specifying-additional-variable-information.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# 2.7 Specifying Additional Variable Information
import os
import settings as st
from revoscalepy import rx_import, rx_get_info
# Importing Delimited Text Data
inFileAddVars = os.path.join(st.SAMPLE_DATA_DIR,'claims.txt')
## REM: Our RxOptions.set_option("OutDataPath", [RESULTS_LOCATION])
## is not taken into account (bug?). Hence, we specify the full location
outfileTypeRelabeled = os.path.join(st.RESULTS_LOCATION,'claimsTypeRelabeled.xdf')
# Defining transformation
colInfoList = {
'type': {
'type': 'factor',
'levels': [ 'A', 'B', 'C', 'D' ],
'newLevels': [ 'Subcompact', 'Compact', 'Mid-size', 'Full-size' ],
'description' : 'Body Type'
}
}
# Applying transformation
rx_import(input_data = inFileAddVars, output_file = outfileTypeRelabeled,
column_info = colInfoList,
overwrite = True)
## REM: Contrary to what is mentioned in the documentation, rx_import
## does not return a RxXdfData object when an output file is specified
## but a <class 'bool'>.
## See: https://docs.microsoft.com/en-us/machine-learning-server/python-reference/revoscalepy/rx-import#returns
# Displaying info
claims_data_frame = rx_import(outfileTypeRelabeled)
info = rx_get_info(claims_data_frame, get_var_info = True)
print(info)
print(claims_data_frame.head())