Skip to content

Commit

Permalink
Reduce codefactor (#487)
Browse files Browse the repository at this point in the history
* Reduce codefactor

* stupid syntax

* typo

* Update plugin.py
  • Loading branch information
JoranAngevaare authored Jul 15, 2021
1 parent 836a7c0 commit e1b77aa
Show file tree
Hide file tree
Showing 20 changed files with 42 additions and 55 deletions.
2 changes: 1 addition & 1 deletion strax/chunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,10 +298,10 @@ def continuity_check(chunk_iter):
"""Check continuity of chunks yielded by chunk_iter as they are yielded"""
last_end = None
last_runid = None

last_subrun = {'run_id': None}
for chunk in chunk_iter:
if chunk.run_id != last_runid:
# TODO: can we do better?
last_end = None
last_subrun = {'run_id': None}

Expand Down
5 changes: 2 additions & 3 deletions strax/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ def new_context(self,
:param replace: If True, replaces settings rather than adding them.
See Context.__init__ for documentation on other parameters.
"""
# TODO: Clone rather than pass on storage front-ends ??
if not isinstance(storage, (list, tuple)):
storage = [storage]
if config is None:
Expand Down Expand Up @@ -751,7 +750,7 @@ def check_cache(d):
def concat_loader(*args, **kwargs):
for x in ldrs:
yield from x(*args, **kwargs)

# pylint: disable=unnecessary-lambda
ldr = lambda *args, **kwargs: concat_loader(*args, **kwargs)

if ldr:
Expand Down Expand Up @@ -1349,6 +1348,7 @@ def function(arr):
+ result.get('result', 0))

n_chunks += 1
n_chunks += 1

result['n_chunks'] = n_chunks
return result
Expand Down Expand Up @@ -1463,7 +1463,6 @@ def is_stored(self, run_id, target, **kwargs):

# If any new options given, replace the current context
# with a temporary one
# TODO duplicated code with with get_iter
if len(kwargs):
# Comment below disables pycharm from inspecting the line below it
# noinspection PyMethodFirstArgAssignment
Expand Down
8 changes: 4 additions & 4 deletions strax/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ def load_file(f, compressor, dtype):
:param dtype: numpy dtype of data to load
"""
if isinstance(f, str):
with open(f, mode='rb') as f:
return _load_file(f, compressor, dtype)
with open(f, mode='rb') as write_file:
return _load_file(write_file, compressor, dtype)
else:
return _load_file(f, compressor, dtype)

Expand Down Expand Up @@ -74,8 +74,8 @@ def save_file(f, data, compressor='zstd'):
if isinstance(f, str):
final_fn = f
temp_fn = f + '_temp'
with open(temp_fn, mode='wb') as f:
result = _save_file(f, data, compressor)
with open(temp_fn, mode='wb') as write_file:
result = _save_file(write_file, data, compressor)
os.rename(temp_fn, final_fn)
return result
else:
Expand Down
2 changes: 1 addition & 1 deletion strax/mailbox.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# pylint: disable=redefined-builtin
from concurrent.futures import Future, TimeoutError
import heapq
import sys
Expand Down Expand Up @@ -319,7 +320,6 @@ def can_write():
if self.killed:
self.log.debug(f"Sender found {self.name} killed while waiting"
" for room for new messages.")
# TODO: this is duplicated from above...
if self.force_killed:
raise MailboxKilled(self.killed_because)
return
Expand Down
2 changes: 1 addition & 1 deletion strax/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ class IterDone(Exception):
# If any of the inputs were trimmed due to early splits,
# trim the others too.
# In very hairy cases this can take multiple passes.
# TODO: can we optimize this, or code it more elegantly?
# can we optimize this, or code it more elegantly?
max_passes_left = 10
while max_passes_left > 0:
this_chunk_end = min([x.end for x in inputs.values()]
Expand Down
2 changes: 1 addition & 1 deletion strax/processing/data_reduction.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class ReductionLevel(IntEnum):
def cut_baseline(records, n_before=48, n_after=30):
""""Replace first n_before and last n_after samples of pulses by 0
"""
# TODO: records.data.shape[1] gives a numba error (file issue?)
# records.data.shape[1] gives a numba error (file issue?)
if not len(records):
return
samples_per_record = len(records[0]['data'])
Expand Down
2 changes: 0 additions & 2 deletions strax/processing/hitlets.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,6 @@ def get_fwxm(hitlet, fraction=0.5):
lbi = (index_maximum - 1) - lbi # start sample minus samples we went to the left
m = data[lbi + 1] - lbs # divided by 1 sample
if m == 0:
# TODO No idea how this happened, we need to fix this
return np.nan, np.nan
left_edge = lbi + (max_val - lbs) / m + 0.5
else:
Expand All @@ -366,7 +365,6 @@ def get_fwxm(hitlet, fraction=0.5):
rbi += 1 + index_maximum # sample to the right plus start
m = data[rbi - 1] - rbs
if m == 0:
# TODO No idea how this happened, we need to fix this
return np.nan, np.nan
right_edge = rbi - (max_val - rbs) / m + 0.5
else:
Expand Down
2 changes: 1 addition & 1 deletion strax/processing/peak_building.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def find_peak_groups(peaks, gap_threshold,
"""
# Mock up a "hits" array so we can just use the existing peakfinder
# It doesn't work on raw peaks, since they might have different dts
# TODO: is there no cleaner way?
# Maybe there is a cleaner way?
fake_hits = np.zeros(len(peaks), dtype=strax.hit_dtype)
fake_hits['dt'] = 1
fake_hits['area'] = 1
Expand Down
4 changes: 2 additions & 2 deletions strax/processing/pulse_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@ def record_links(records):
"""Return (prev_r, next_r), each arrays of indices of previous/next
record in the same pulse, or -1 if this is not applicable
"""
# TODO: needs tests
if not len(records):
return (
np.ones(0, dtype=np.int32) * NO_RECORD_LINK,
Expand Down Expand Up @@ -176,7 +175,8 @@ def find_hits(records,
min_amplitude: ty.Union[int, np.ndarray] = 15,
min_height_over_noise: ty.Union[int, np.ndarray] = 0):
"""Return hits (intervals >= threshold) found in records.
Hits that straddle record boundaries are split (TODO: fix this?)
Hits that straddle record boundaries are split (perhaps we should
fix this?)
NB: returned hits are NOT sorted yet!
"""
Expand Down
1 change: 0 additions & 1 deletion strax/run_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
def list_available(self, target, **kwargs):
"""Return sorted list of run_id's for which target is available
"""
# TODO duplicated code with with get_iter
if len(kwargs):
# noinspection PyMethodFirstArgAssignment
self = self.new_context(**kwargs)
Expand Down
8 changes: 5 additions & 3 deletions strax/storage/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,8 @@ def _saver(self, dirname, metadata, **kwargs):
class FileSaver(strax.Saver):
"""Saves data to compressed binary files"""
json_options = dict(sort_keys=True, indent=4)
# When writing chunks, rewrite the json file every time we write a chunk
_flush_md_for_every_chunk = True

def __init__(self, dirname, metadata, **kwargs):
super().__init__(metadata, **kwargs)
Expand Down Expand Up @@ -327,11 +329,11 @@ def _save_chunk_metadata(self, chunk_info):

if not self.is_forked or is_first:
# Just append and flush the metadata
# (maybe not super-efficient to write the json everytime...
# (maybe not super-efficient to write the json every time...
# just don't use thousands of chunks)
# TODO: maybe make option to turn this off?
self.md['chunks'].append(chunk_info)
self._flush_metadata()
if self._flush_md_for_every_chunk:
self._flush_metadata()

def _close(self):
if not os.path.exists(self.tempdirname):
Expand Down
9 changes: 4 additions & 5 deletions strax/storage/mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,10 @@ def _read_chunk(self, backend_key, chunk_info, dtype, compressor):
raise ValueError(
f'Metadata claims chunk{chunk_i} exists but it is unknown to '
f'the chunks_registry')
else:
chunk_doc = doc.get('data', None)
if chunk_doc is None:
raise ValueError(
f'Doc for chunk_{chunk_i} in wrong format:\n{doc}')

chunk_doc = doc.get('data', None)
if chunk_doc is None:
raise ValueError(f'Doc for chunk_{chunk_i} in wrong format:\n{doc}')

# Convert JSON to numpy
chunk_len = len(chunk_doc)
Expand Down
1 change: 1 addition & 0 deletions strax/storage/rucio.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def rucio_path(root_dir, filename, dirname):
"""Convert target to path according to rucio convention"""
scope = "xnt_"+dirname.split('-')[0]
rucio_did = "{0}:{1}".format(scope, filename)
# disable bandit
rucio_md5 = hashlib.md5(rucio_did.encode('utf-8')).hexdigest()
t1 = rucio_md5[0:2]
t2 = rucio_md5[2:4]
Expand Down
2 changes: 1 addition & 1 deletion strax/storage/zipfiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def _backend_key(self, key):
def _zipname(self, key):
zipname = osp.join(self.path, key.run_id + '.zip')
# Since we're never writing, this check can be here
# TODO: sounds like a bad idea?
# is this a bad idea?
if not osp.exists(zipname):
raise strax.DataNotAvailable
return zipname
Expand Down
1 change: 0 additions & 1 deletion strax/testutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ def sorted_bounds(disjoint=False,
# Fake intervals
##

# TODO: isn't this duplicated with bounds_to_records??

def bounds_to_intervals(bs, dtype=strax.interval_dtype):
x = np.zeros(len(bs), dtype=dtype)
Expand Down
4 changes: 1 addition & 3 deletions strax/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ def deterministic_hash(thing, length=10):
"""
hashable = hashablize(thing)
jsonned = json.dumps(hashable, cls=NumpyJSONEncoder)
# disable bandit
digest = sha1(jsonned.encode('ascii')).digest()
return b32encode(digest)[:length].decode('ascii').lower()

Expand All @@ -311,9 +312,6 @@ def formatted_exception():
For MailboxKilled exceptions, we return the original
exception instead.
"""
# Can't do this at the top level, utils is one of the
# first files of the strax package
import strax
exc_info = sys.exc_info()
if exc_info[0] == strax.MailboxKilled:
# Get the original exception back out
Expand Down
35 changes: 15 additions & 20 deletions tests/test_loop_plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,19 @@ def compute(self, big_kinda_data):
# Drop some of the data in big_kinda_data
return drop_random(big_kinda_data)

def _compute_loop_inner(res, k, big_kinda_data, small_kinda_data):
if k == _dtype_name:
res[k] = big_kinda_data[k]
for small_bit in small_kinda_data[k]:
if np.iterable(res[k]):
for i in range(len(res[k])):
res[k][i] += small_bit
else:
res[k] += small_bit
else:
res[k] = big_kinda_data[k]


class AddBigToSmall(strax.LoopPlugin):
"""
Test loop plugin by looping big_thing and adding whatever is in small_thing
Expand All @@ -121,16 +134,7 @@ def infer_dtype(self):
def compute_loop(self, big_kinda_data, small_kinda_data):
res = {}
for k in self.dtype.names:
if k == _dtype_name:
res[k] = big_kinda_data[k]
for small_bit in small_kinda_data[k]:
if np.iterable(res[k]):
for i in range(len(res[k])):
res[k][i] += small_bit
else:
res[k] += small_bit
else:
res[k] = big_kinda_data[k]
_compute_loop_inner(res, k, big_kinda_data, small_kinda_data)
return res

class AddBigToSmallMultiOutput(strax.LoopPlugin):
Expand All @@ -146,16 +150,7 @@ def infer_dtype(self):
def compute_loop(self, big_kinda_data, small_kinda_data):
res = {}
for k in self.dtype['some_combined_things'].names:
if k == _dtype_name:
res[k] = big_kinda_data[k]
for small_bit in small_kinda_data[k]:
if np.iterable(res[k]):
for i in range(len(res[k])):
res[k][i] += small_bit
else:
res[k] += small_bit
else:
res[k] = big_kinda_data[k]
_compute_loop_inner(res, k, big_kinda_data, small_kinda_data)
return {k: res for k in self.provides}

with tempfile.TemporaryDirectory() as temp_dir:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_mailbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def mailbox_tester(messages,
reader_sleeps=reader_sleeps)
for _ in range(n_readers)]

for i in range(len(messages)):
for i, _ in enumerate(messages):
mb.send(messages[i], msg_number=numbers[i])
print(f"Sent message {i}. Now {len(mb._mailbox)} ms in mailbox.")

Expand Down
2 changes: 1 addition & 1 deletion tests/test_pulse_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

def _find_hits(r):
# Test pulses have dt=1 and time=0
# TODO: hm, maybe this doesn't test everything
# hm, maybe this doesn't test everything?

hits = strax.find_hits(r, min_amplitude=1)

Expand Down
3 changes: 0 additions & 3 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ def bla(_result_buffer=None, result_dtype=None):

result = np.array([0, 1, 2, 3, 4], dtype=np.int64)
np.testing.assert_equal(bla(), result)
# TODO: re-enable chunk size spec?
# np.testing.assert_equal(bla(chunk_size=1), result)
# np.testing.assert_equal(bla(chunk_size=7), result)
should_get = result.astype(np.float64)
got = bla(result_dtype=np.float64)
np.testing.assert_equal(got, should_get)
Expand Down

0 comments on commit e1b77aa

Please sign in to comment.