Skip to content

Commit

Permalink
Issue #536 - Remove pickle from config caching (#537)
Browse files Browse the repository at this point in the history
  • Loading branch information
nttoole authored Oct 25, 2024
1 parent 61001ed commit dc9280b
Show file tree
Hide file tree
Showing 7 changed files with 197 additions and 162 deletions.
37 changes: 32 additions & 5 deletions ait/core/dmc.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,13 @@
import datetime
import math
import os.path
import pickle
from typing import Tuple

import msgpack # type: ignore
import requests
from msgpack.exceptions import ExtraData # type: ignore
from msgpack.exceptions import FormatError # type: ignore
from msgpack.exceptions import StackError # type: ignore

import ait.core
from ait.core import log
Expand Down Expand Up @@ -316,10 +319,22 @@ def _load_leap_second_data(self):
try:
log.info("Attempting to load leapseconds.dat")
with open(ls_file, "rb") as outfile:
self._data = pickle.load(outfile)
log.info("Loaded leapseconds config file successfully")
packed_data = outfile.read()

unpacked_data = msgpack.unpackb(packed_data, object_hook=mp_decode_datetime)

# msgpack converts tuples to lists, so have to convert back
if unpacked_data and "leapseconds" in unpacked_data:
lst_list = unpacked_data["leapseconds"]
tup_list = [tuple(lst) for lst in lst_list]
unpacked_data["leapseconds"] = tup_list
self._data = unpacked_data
log.info("Loaded leapseconds config file successfully")

except IOError:
log.info("Unable to locate leapseconds config file")
except (ValueError, ExtraData, FormatError, StackError):
log.info("Unable to load leapseconds.dat")

if not (self._data and self.is_valid()):
try:
Expand Down Expand Up @@ -391,11 +406,23 @@ def _update_leap_second_data(self):
log.info("Leapsecond data processed")

self._data = data
packed_data = msgpack.packb(data, default=mp_encode_datetime)
with open(ls_file, "wb") as outfile:
pickle.dump(data, outfile)

outfile.write(packed_data)
log.info("Successfully generated leapseconds config file")


def mp_decode_datetime(obj):
if "__datetime__" in obj:
obj = datetime.datetime.strptime(obj["as_str"], "%Y%m%dT%H:%M:%S.%f")
return obj


def mp_encode_datetime(obj):
if isinstance(obj, datetime.datetime):
return {"__datetime__": True, "as_str": obj.strftime("%Y%m%dT%H:%M:%S.%f")}
return obj


if not LeapSeconds:
LeapSeconds = UTCLeapSeconds()
20 changes: 1 addition & 19 deletions ait/core/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
import datetime
import hashlib
import io
import os
import pickle

import yaml

Expand Down Expand Up @@ -457,27 +455,11 @@ def __init__(self, filename=None):
filename = ait.config.get("table.filename")

self.filename = filename
self.cachename = os.path.splitext(filename)[0] + ".pkl"
self.fswtabdict = None

@property
def dirty(self):
"""True if the pickle cache needs to be regenerated, False to
use current pickle binary"""
return util.check_yaml_timestamps(self.filename, self.cachename)

def load(self):
if self.fswtabdict is None:
if self.dirty:
self.fswtabdict = FSWTabDict(self.filename)
util.update_cache(self.filename, self.cachename, self.fswtabdict)
log.info(f"Loaded new pickle file: {self.cachename}")
else:
with open(self.cachename, "rb") as stream:
self.fswtabdict = pickle.load(stream)
log.info(
"Current pickle file loaded: " f'{self.cachename.split("/")[-1]}'
)
self.fswtabdict = FSWTabDict(self.filename)

return self.fswtabdict

Expand Down
2 changes: 1 addition & 1 deletion ait/core/tlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,7 @@ class PacketExpression:
"""PacketExpression
A Packet Expression is a simple mathematical expression that can
be evaluted in the context of a Packet. Names in the formula
be evaluated in the context of a Packet. Names in the formula
refer to fields in the packet.
Packet Expressions provide a convenient mechanism to express and
Expand Down
103 changes: 29 additions & 74 deletions ait/core/util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#!/usr/bin/env python2.7
# Advanced Multi-Mission Operations System (AMMOS) Instrument Toolkit (AIT)
# Bespoke Link to Instruments and Small Satellites (BLISS)
#
Expand Down Expand Up @@ -36,30 +35,14 @@ def __init__(self, filename, loader):
"""
Creates a new ObjectCache
Caches the Python object returned by loader(filename), using
Python's pickle object serialization mechanism. An ObjectCache
is useful when loader(filename) is slow.
Caches the Python object returned by loader(filename).
An ObjectCache is useful when loader(filename) is slow.
The result of loader(filename) is cached to cachename, the
basename of filename with a '.pkl' extension.
Use the load() method to load, either via loader(filename) or
the pickled cache file, whichever was modified most recently.
Use the load() method to load
"""
self._loader = loader
self._dict = None
self._filename = filename
self._cachename = os.path.splitext(filename)[0] + ".pkl"

@property
def cachename(self):
"""The pickled cache filename"""
return self._cachename

@property
def dirty(self):
"""True if the pickle cache needs to be regenerated, False to use current pickle binary"""
return check_yaml_timestamps(self.filename, self.cachename)

@property
def filename(self):
Expand All @@ -70,19 +53,11 @@ def load(self):
"""
Loads the Python object
Loads the Python object, either via loader (filename) or the
pickled cache file, whichever was modified most recently.
Loads the Python object via loader (filename).
"""

if self._dict is None:
if self.dirty:
self._dict = self._loader(self.filename)
update_cache(self.filename, self.cachename, self._dict)
log.info(f"Loaded new pickle file: {self.cachename}")
else:
with open(self.cachename, "rb") as stream:
self._dict = pickle.load(stream)
log.info(f'Current pickle file loaded: {self.cachename.split("/")[-1]}')
self._dict = self._loader(self.filename)
return self._dict


Expand All @@ -94,45 +69,46 @@ def load(self):
timer = time.time


def check_yaml_timestamps(yaml_file_name, cache_name):
def check_yaml_timestamps(yaml_file_name, cache_file_name):
"""
Checks YAML configuration file timestamp and any 'included' YAML configuration file's
timestamp against the pickle cache file timestamp.
The term 'dirty' means that a yaml config file has a more recent timestamp than the
pickle cache file. If a pickle cache file is found to be 'dirty' (return true) the
pickle cache file is not up-to-date, and a new pickle cache file must be generated.
If the cache file in not 'dirty' (return false) the existing pickle binary will
be loaded.
Checks YAML configuration file timestamp and any 'included' YAML
configuration file's timestamp against the cache file's timestamp.
The term 'dirty' means that a yaml config file has a more recent
timestamp than the cache file. If file is found to be 'dirty'
(return True) the cache file can be considered not up-to-date.
If the other file in not 'dirty' (return False) the cache file can be
considered up-to-date.
param: yaml_file_name: str
Name of the yaml configuration file to be tested
param: cache_name: str
Filename with path to the cached pickle file for this config file.
param: cache_file_name: str
Filename with path to the cache file to be compared
return: boolean
True:
Indicates 'dirty' pickle cache: i.e. the file is not current, generate new binary
The cache file is not current, or does not exist
False
Load current cache file
The cache file can be considered current
"""
# If no pickle cache exists return True to make a new one.
if not os.path.exists(cache_name):
log.debug("No pickle cache exists, make a new one")
# If no cache exists return True to make a new one.
if not os.path.exists(cache_file_name):
log.debug("No cache exists, make a new one")
return True
# Has the yaml config file has been modified since the creation of the pickle cache
if os.path.getmtime(yaml_file_name) > os.path.getmtime(cache_name):
log.info(f"{yaml_file_name} modified - make a new binary pickle cache file.")
# Has the yaml config file has been modified since the creation of the cache
if os.path.getmtime(yaml_file_name) > os.path.getmtime(cache_file_name):
log.info(f"{yaml_file_name} modified - make a new cache file.")
return True
# Get the directory of the yaml config file to be parsed
dir_name = os.path.dirname(yaml_file_name)
# Open the yaml config file to look for '!includes' to be tested on the next iteration
# Open the yaml config file to look for '!includes' to be tested
# on the next iteration
with open(yaml_file_name, "r") as file:
try:
for line in file:
if not line.strip().startswith("#") and "!include" in line:
check = check_yaml_timestamps(
os.path.join(dir_name, line.strip().split(" ")[2]), cache_name
os.path.join(dir_name, line.strip().split(" ")[2]), cache_file_name
)
if check:
return True
Expand All @@ -144,27 +120,6 @@ def check_yaml_timestamps(yaml_file_name, cache_name):
return False


def update_cache(yaml_file_name, cache_file_name, object_to_serialize):
"""
Caches the result of loader (yaml_file_name) to pickle binary (cache_file_name), if
the yaml config file has been modified since the last pickle cache was created, i.e.
(the binary pickle cache is declared to be 'dirty' in 'check_yaml_timestamps()').
param: yaml_file_name: str
Name of the yaml configuration file to be serialized ('pickled')
param: cache_file_name: str
File name with path to the new serialized cached pickle file for this config file.:
param: object_to_serialize: object
Object to serialize ('pickle') e.g. instance of 'ait.core.cmd.CmdDict'
"""

msg = f"Saving updates from more recent {yaml_file_name} to {cache_file_name}."
log.info(msg)
with open(cache_file_name, "wb") as output:
pickle.dump(object_to_serialize, output, -1)


def __init_extensions__(modname, modsyms): # noqa
"""
Initializes a module (given its name and :func:`globals()` symbol
Expand Down Expand Up @@ -290,11 +245,11 @@ def setDictDefaults(d, defaults): # noqa

def getDefaultDict(modname, config_key, loader, reload=False, filename=None): # noqa
"""
Returns default AIT dictonary for modname
Returns default AIT dictionary for modname
This helper function encapulates the core logic necessary to
(re)load, cache (via util.ObjectCache), and return the default
dictionary. For example, in ait.core.cmd:
(re)load and return the default dictionary.
For example, in ait.core.cmd:
def getDefaultDict(reload=False):
return ait.util.getDefaultDict(__name__, 'cmddict', CmdDict, reload)
Expand Down
Binary file modified config/leapseconds.dat
Binary file not shown.
Loading

0 comments on commit dc9280b

Please sign in to comment.