generated from drkostas/template_python_project
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfiguration.py
205 lines (171 loc) · 7.2 KB
/
configuration.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import os
import logging
from typing import Dict, List, Tuple, Union
import json
import _io
from io import StringIO, TextIOWrapper
import re
import yaml
from jsonschema import validate as validate_json_schema
logger = logging.getLogger('Configuration')
class Configuration:
"""Handles the loading of the configuration settings from a yml file."""
__slots__ = ('config', 'config_path', 'datastore', 'spark', 'input', 'run_options', 'output', 'tag')
config: Dict
config_path: str
datastore: Dict
spark: Dict
input: Dict
run_options: Dict
output: Dict
tag: str
config_attributes: List = []
env_variable_tag: str = '!ENV'
env_variable_pattern: str = r'.*?\${(\w+)}.*?' # ${var}
def __init__(self, config_src: Union[TextIOWrapper, StringIO, str], config_schema_path: str = 'yml_schema.json'):
"""The basic constructor. Creates a new instance of the Configuration
class.
Args:
config_src:
config_schema_path (str):
"""
logger.info("Initializing Configuration..")
# Load the predefined schema of the configuration
configuration_schema = self.load_configuration_schema(config_schema_path=config_schema_path)
# Load the configuration
self.config, self.config_path = self.load_yml(config_src=config_src,
env_tag=self.env_variable_tag,
env_pattern=self.env_variable_pattern)
logger.debug("Loaded config: %s" % self.config)
# Validate the config
validate_json_schema(self.config, configuration_schema)
# Set the config properties as instance attributes
self.tag = self.config['tag']
all_config_attributes = ('datastore', 'spark', 'input', 'run_options', 'output')
for config_attribute in all_config_attributes:
if config_attribute in self.config.keys():
setattr(self, config_attribute, self.config[config_attribute])
self.config_attributes.append(config_attribute)
else:
setattr(self, config_attribute, None)
@staticmethod
def load_configuration_schema(config_schema_path: str) -> Dict:
"""
Args:
config_schema_path (str):
"""
with open('/'.join([os.path.dirname(os.path.realpath(__file__)), config_schema_path])) as f:
configuration_schema = json.load(f)
return configuration_schema
@staticmethod
def load_yml(config_src: Union[TextIOWrapper, StringIO, str], env_tag: str, env_pattern: str) -> Tuple[Dict, str]:
"""
Loads a yml file.
"""
pattern = re.compile(env_pattern)
loader = yaml.SafeLoader
loader.add_implicit_resolver(env_tag, pattern, None)
def constructor_env_variables(loader, node):
"""
Extracts the environment variable from the node's value
:param yaml.Loader loader: the yaml loader
:param node: the current node in the yaml
:return: the parsed string that contains the value of the environment
variable
"""
value = loader.construct_scalar(node)
match = pattern.findall(value) # to find all env variables in line
if match:
full_value = value
for g in match:
full_value = full_value.replace(
f'${{{g}}}', os.environ.get(g, g)
)
return full_value
return value
loader.add_constructor(env_tag, constructor_env_variables)
if isinstance(config_src, TextIOWrapper):
logging.debug("Loading yaml from TextIOWrapper")
config = yaml.load(config_src, Loader=loader)
config_path = config_src.name
elif isinstance(config_src, StringIO):
logging.debug("Loading yaml from StringIO")
config = yaml.load(config_src, Loader=loader)
config_path = "StringIO"
elif isinstance(config_src, str):
logging.debug("Loading yaml from path")
with open(config_src) as f:
config = yaml.load(f, Loader=loader)
config_path = config_src
else:
raise TypeError('Config file must be TextIOWrapper or path to a file')
return config, config_path
def get_datastore_configs(self) -> List[Dict]:
"""Returns the datastore configs."""
if 'datastore' in self.config_attributes:
return [sub_config['config'] for sub_config in self.datastore]
else:
raise ConfigurationError('Config property datastore not set!')
def get_spark_configs(self) -> List[Dict]:
"""Returns the input configs."""
if 'spark' in self.config_attributes:
return [sub_config for sub_config in self.spark]
else:
raise ConfigurationError('Config property spark not set!')
def get_input_configs(self) -> List[Dict]:
"""Returns the input configs."""
if 'input' in self.config_attributes:
return [sub_config['config'] for sub_config in self.input]
else:
raise ConfigurationError('Config property input not set!')
def get_run_options_configs(self) -> List[Dict]:
"""Returns the run options configs"""
if 'run_options' in self.config_attributes:
return [sub_config['config'] for sub_config in self.run_options]
else:
raise ConfigurationError('Config property run_options not set!')
def get_output_configs(self) -> List[Dict]:
"""Returns the output configs"""
if 'output' in self.config_attributes:
return [sub_config['config'] for sub_config in self.output]
else:
raise ConfigurationError('Config property output not set!')
def to_yml(self, fn: Union[str, _io.TextIOWrapper]) -> None:
"""Writes the configuration to a stream. For example a file.
Args:
fn:
Returns:
None
"""
dict_conf = dict()
for config_attribute in self.config_attributes:
dict_conf[config_attribute] = getattr(self, config_attribute)
dict_conf['tag'] = self.tag
if isinstance(fn, str):
with open(fn, 'w') as f:
yaml.dump(dict_conf, f, default_flow_style=False)
elif isinstance(fn, _io.TextIOWrapper):
yaml.dump(dict_conf, fn, default_flow_style=False)
else:
raise TypeError('Expected str or _io.TextIOWrapper not %s' % (type(fn)))
to_yaml = to_yml
def to_json(self) -> Dict:
dict_conf = dict()
for config_attribute in self.config_attributes:
dict_conf[config_attribute] = getattr(self, config_attribute)
dict_conf['tag'] = self.tag
return dict_conf
def __getitem__(self, item):
"""
Args:
item:
"""
return self.__getattribute__(item)
class ConfigurationError(Exception):
def __init__(self, message):
# Call the base class constructor with the parameters it needs
"""
Args:
message:
"""
super().__init__(message)