This repository has been archived by the owner on Dec 6, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
push_new_pricing_to_statusdb.py
executable file
·517 lines (423 loc) · 19.4 KB
/
push_new_pricing_to_statusdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
#!/usr/bin/env python
"""
Reads the cost_calculator excel sheet and puts all that information
into statusdb.
"""
import argparse
from openpyxl import load_workbook
import coloredlogs
import logging
import yaml
from couchdb import Server
import datetime
from collections import OrderedDict
import pprint
FIRST_ROW = {'components': 9,
'products': 4}
SHEET = {'components': 'Price list',
'products': 'Products'}
# Skip columns which are calculated from the other fields
SKIP = {'components': ['Price', 'Total', 'Per unit'],
'products': ['Internal', 'External']}
# The name of the _id_ key and which variables that shouldn't be changed
# while keeping the same _id_. If an update of any of these fields is needed,
# a new id should be created.
CONSERVED_KEY_SETS = {'products': ['Category', 'Type', 'Name'],
'components': ['Category', 'Type', 'Product name']}
# The combination of these "columns" should be unique within the document,
# a warning will be issued otherwise
UNIQUE_KEY_SETS = {'products': ['Category', 'Type', 'Name'],
'components': ['Category', 'Type', 'Product name', 'Units']}
NOT_NULL_KEYS = {'products': ['REF_ID', 'Category', 'Type', 'Name', 'Re-run fee'],
'components': ['REF_ID', 'Category', 'Type', 'Status',
'Product name', 'Units', 'Currency',
'List price', 'Discount']}
MAX_NR_ROWS = 200
# Assuming the rows of products are sorted in the preferred order
# Set up a logger with colored output
logger = logging.getLogger('push_new_pricing_logger')
logger.propagate = False # Otherwise the messages appeared twice
coloredlogs.install(level='INFO', logger=logger,
fmt='%(asctime)s %(levelname)s %(message)s')
def check_unique(items, type):
"""Check whether all items within _items_
fulfill the uniqueness criteria according to the UNIQUE_KEY_SETS.
Otherwise warn accordingly.
"""
key_val_set = set()
for id, item in items.items():
keys = UNIQUE_KEY_SETS[type]
t = tuple(item[key] for key in keys)
# Check that it is not already added
if t in key_val_set:
logger.warning("Key combination {}:{} is included multiple "
"times in the {} sheet. ".format(keys, t, type))
key_val_set.add(t)
return True
def check_conserved(new_items, current_items, type):
"""Ensures the keys in CONSERVED_KEY_SETS are conserved for each given id.
Compares the new version against the currently active one.
Params:
new_items - A dict of the items that are to be added
with ID attribute as the key.
current_items - A dict of the items currently in the database
with ID attribute as the key.
type - Either "components" or "products"
"""
conserved_keys = CONSERVED_KEY_SETS[type]
for id, new_item in new_items.items():
if str(id) in current_items:
for conserved_key in conserved_keys:
if conserved_key not in new_item:
logger.warning("{} column not found in new {} row with "
"id {}. This column should be kept "
"conserved.".format(conserved_key, type, id))
if new_item[conserved_key] != current_items[str(id)][conserved_key]:
logger.warning("{} should be conserved for {}. "
"Violated for item with id {}. "
"Found \"{}\" for new and \"{}\" for current. ".format(
conserved_key, type,
id, new_item[conserved_key],
current_items[str(id)][conserved_key]))
return True
def check_not_null(items, type):
"""Make sure type specific columns (given by NOT_NULL_KEYS) are not null."""
not_null_keys = NOT_NULL_KEYS[type]
for id, item in items.items():
for not_null_key in not_null_keys:
if item[not_null_key] is None or item[not_null_key] == '':
# Special case for discontinued components
if 'Status' in item and item['Status'] == 'Discontinued':
pass
else:
raise ValueError("{} cannot be empty for {}."
" Violated for item with id {}.".\
format(not_null_key, type, id))
def check_discontinued(components, products):
"""Make sure no discontinued components are used for enabled products."""
for product_id, product in products.items():
component_ids = []
if product["Components"]:
component_ids += list(product["Components"].keys())
if product["Alternative Components"]:
component_ids += list(product["Alternative Components"].keys())
for component_id in component_ids:
if product["Status"] == "Enabled":
if components[component_id]["Status"] == "Discontinued":
logger.warning(("Product {}:\"{}\" uses the discontinued component "
"{}:\"{}\", changing product status to \"discontinued\"").\
format(product_id, products[product_id]["Name"], \
component_id, components[component_id]["Product name"]))
product["Status"] = "Discontinued"
def get_current_items(db, type):
rows = db.view("entire_document/by_version", descending=True, limit=1).rows
if len(rows) != 0:
doc = rows[0].value
return doc[type]
return {}
def is_empty_row(comp):
for k, v in comp.items():
if v != '':
return False
return True
def load_products(wb):
ws = wb[SHEET['products']]
row = FIRST_ROW['products']
header_row = row - 1
header_cells = ws[header_row]
header = {}
product_price_columns = {}
for cell in header_cells:
cell_val = cell.value
if cell_val == 'ID':
cell_val = 'REF_ID' # Don't want to confuse it with couchdb ids
# Get cell column as string
cell_column = cell.coordinate.replace(str(header_row), '')
if cell_val not in SKIP['products']:
header[cell_column] = cell_val
else:
# save a lookup to find column of prices
product_price_columns[cell_val] = cell_column
products = OrderedDict()
# Unkown number of rows
while row < MAX_NR_ROWS:
new_product = {}
fetch_prices = False # default behaviour
for col, header_val in header.items():
val = ws["{}{}".format(col, row)].value
if val is None:
val = ''
if header_val in ['Components', 'Alternative Components']:
# Some cells might be interpreted as floats
# e.g. "37,78"
val = str(val)
val = val.replace('.', ',')
if val:
val_list = []
for comp_id in val.split(','):
try:
int(comp_id)
except ValueError:
print("Product on row {} has component with "
"invalid id {}: not an integer, "
" aborting!".format(row, comp_id))
raise
# Make a list with all individual components
val_list.append(comp_id)
val = {comp_ref_id: {'quantity': 1} for comp_ref_id in val_list}
elif header_val == 'Components':
# If no components are listed, price should be fetched as well,
# unless the row is in fact empty.
if not is_empty_row(new_product):
fetch_prices = True
# Comment column occurs after the price columns, so
# checking for this ensures that the prices have been parsed
if (header_val == 'Comment') and fetch_prices:
# Fixed price is added when price does not
# directly depend on the components
new_product['fixed_price'] = {}
int_cell = "{}{}".format(
product_price_columns['Internal'],
row
)
ext_cell = "{}{}".format(
product_price_columns['External'],
row
)
new_product['fixed_price']['price_in_sek'] = ws[int_cell].value
new_product['fixed_price']['external_price_in_sek'] = ws[ext_cell].value
new_product[header_val] = val
if not is_empty_row(new_product):
# The id seems to be stored as a string in the database
# so might as well always have the ids as strings.
product_id = str(new_product['REF_ID'])
# Prepare for a status value on products
if 'Status' not in new_product:
new_product['Status'] = "Enabled"
products[product_id] = new_product
row += 1
return products
def load_components(wb):
ws = wb[SHEET['components']]
# Unkown number of rows
row = FIRST_ROW['components']
header_row = row - 1
header_cells = ws[header_row]
header = {}
for cell in header_cells:
cell_val = cell.value
if cell_val == 'ID':
cell_val = 'REF_ID' # Don't want to confuse it with couchdb ids
if cell_val not in SKIP['components']:
# Get cell column as string
cell_column = cell.coordinate.replace(str(header_row), '')
header[cell_column] = cell_val
components = {}
while row < MAX_NR_ROWS:
new_component = {}
for col, header_val in header.items():
val = ws["{}{}".format(col, row)].value
if val is None:
val = ''
elif header_val == 'REF_ID':
# The id seems to be stored as a string in the database
# so might as well always have the ids as strings.
try:
int(val)
except ValueError:
print("ID value {} for row {} is not an id, "
"aborting.".format(val, row))
val = str(val)
new_component[header_val] = val
if new_component['REF_ID'] in components:
# Violates the uniqueness of the ID
raise ValueError("ID {} is included multiple "
"times in the {} sheet. "
"ABORTING.".format(new_component['REF_ID'], type))
if not is_empty_row(new_component):
components[new_component['REF_ID']] = new_component
row += 1
return components
def get_current_version(db):
view_result = db.view('entire_document/by_version', limit=1,
descending=True)
if view_result.rows:
return int(view_result.rows[0].value['Version'])
else:
return 0
def compare_two_objects(obj1, obj2, ignore_updated_time=True):
# Make copies in order to ignore fields
obj1_copy = obj1.copy()
obj2_copy = obj2.copy()
if ignore_updated_time:
if 'Last Updated' in obj1_copy:
obj1_copy.pop('Last Updated')
if 'Last Updated' in obj2_copy:
obj2_copy.pop('Last Updated')
return obj1_copy == obj2_copy
def set_last_updated_field(new_objects, current_objects, object_type):
# if object is not found or changed in current set last updated field
now = datetime.datetime.now().isoformat()
for id in list(new_objects.keys()):
updated = False
if id in current_objects:
# Beware! This simple == comparison is quite brittle. Sensitive to
# str vs int and such.
the_same = compare_two_objects(new_objects[id],
current_objects[id])
if not the_same:
updated = True
else:
updated = True
if updated:
print("Updating {}: {}".format(object_type, id))
new_objects[id]['Last Updated'] = now
else:
new_objects[id]['Last Updated'] = current_objects[id]['Last Updated']
return new_objects
def main_push(input_file, config, user, user_email,
add_components=False, add_products=False, push=False):
with open(config) as settings_file:
server_settings = yaml.load(settings_file, Loader=yaml.SafeLoader)
couch = Server(server_settings.get("couch_server", None))
wb = load_workbook(input_file, read_only=True, data_only=True)
# setup a default doc that will be pushed
doc = {}
doc['Issued by user'] = user
doc['Issued by user email'] = user_email
doc['Issued at'] = datetime.datetime.now().isoformat()
# A newly pushed document is always a draft
doc['Draft'] = True
# --- Components --- #
comp_db = couch['pricing_components']
components = load_components(wb)
check_unique(components, 'components')
check_not_null(components, 'components')
current_components = get_current_items(comp_db, 'components')
if current_components:
check_conserved(components, current_components, 'components')
# Modify the `last updated`-field of each item
components = set_last_updated_field(components,
current_components,
'component')
# Save it but push it only if products are also parsed correctly
comp_doc = doc.copy()
comp_doc['components'] = components
current_version = get_current_version(comp_db)
comp_doc['Version'] = current_version + 1
# --- Products --- #
prod_db = couch['pricing_products']
products = load_products(wb)
check_unique(products, 'products')
check_not_null(products, 'products')
current_products = get_current_items(prod_db, 'products')
if current_products:
check_conserved(products, current_products, 'products')
# Modify the `last updated`-field of each item
products = set_last_updated_field(products,
current_products,
'product')
prod_doc = doc.copy()
prod_doc['products'] = products
current_version = get_current_version(prod_db)
prod_doc['Version'] = current_version + 1
# Verify no discontinued components are used for enabled products
check_discontinued(components, products)
# --- Push or Print --- #
if push:
comp_db = couch['pricing_components']
prod_db = couch['pricing_products']
curr_comp_rows = comp_db.view("entire_document/by_version", descending=True, limit=1).rows
curr_prod_rows = prod_db.view("entire_document/by_version", descending=True, limit=1).rows
# Check that the latest one is not a draft
if (len(curr_comp_rows) == 0) or (len(curr_prod_rows) == 0):
print("No current version found. This will be the first!")
else:
curr_comp_doc = curr_comp_rows[0].value
curr_prod_doc = curr_prod_rows[0].value
if curr_comp_doc['Draft'] or curr_prod_doc['Draft']:
print("Most recent version is a draft. Please remove or "
"publish this one before pushing a new draft. Aborting!")
return
logger.info(
'Pushing components document version {}'.format(comp_doc['Version'])
)
comp_db.save(comp_doc)
logger.info(
'Pushing products document version {}'.format(prod_doc['Version'])
)
prod_db.save(prod_doc)
else:
# Prettyprint the json output
pprint.pprint(comp_doc)
pprint.pprint(prod_doc)
def main_publish(config, user, user_email, dryrun=True):
with open(config) as settings_file:
server_settings = yaml.load(settings_file, Loader=yaml.SafeLoader)
couch = Server(server_settings.get("couch_server", None))
comp_db = couch['pricing_components']
prod_db = couch['pricing_products']
comp_rows = comp_db.view("entire_document/by_version", descending=True, limit=1).rows
prod_rows = prod_db.view("entire_document/by_version", descending=True, limit=1).rows
if (len(comp_rows) == 0) or (len(prod_rows) == 0):
print("No draft version found to publish. Aborting!")
return
comp_doc = comp_rows[0].value
prod_doc = prod_rows[0].value
if (not comp_doc['Draft']) or (not prod_doc['Draft']):
print("Most recent version is not a draft. Aborting!")
return
now = datetime.datetime.now().isoformat()
comp_doc['Draft'] = False
comp_doc['Published'] = now
prod_doc['Draft'] = False
prod_doc['Published'] = now
if not dryrun:
logger.info(
'Pushing components document version {}'.format(comp_doc['Version'])
)
comp_db.save(comp_doc)
logger.info(
'Pushing products document version {}'.format(prod_doc['Version'])
)
prod_db.save(prod_doc)
else:
print(prod_doc, comp_doc)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=__doc__)
subparsers = parser.add_subparsers(
title='actions',
dest='subcommand_name',
description="Either 'push' for uploading a draft "
"pricing version or 'publish' to make "
"the current draft the latest version"
)
push_parser = subparsers.add_parser('push')
push_parser.add_argument('pricing_excel_file',
help="The excel file currently used for pricing")
push_parser.add_argument('--statusdb_config', required=True,
help='The genomics-status settings.yaml file.')
push_parser.add_argument('--push', action='store_true',
help='Use this tag to actually push to the databse,'
' otherwise it is just dryrun')
push_parser.add_argument('--user', required=True,
help='User that change the document')
push_parser.add_argument('--user_email', required=True,
help='Email for the user who changed the document')
publish_parser = subparsers.add_parser('publish')
publish_parser.add_argument('--statusdb_config', required=True,
help='The genomics-status settings.yaml file.')
publish_parser.add_argument('--user', required=True,
help='User that change the document')
publish_parser.add_argument('--user_email', required=True,
help='Email for the user who changed the document')
publish_parser.add_argument('--dryrun', action='store_true',
help="Use this tag to only print what would "
"have been done")
args = parser.parse_args()
if args.subcommand_name == 'push':
main_push(args.pricing_excel_file, args.statusdb_config, args.user,
args.user_email, push=args.push)
elif args.subcommand_name == 'publish':
main_publish(args.statusdb_config, args.user, args.user_email,
dryrun=args.dryrun)