From 9b0aaeb4771fd1cae2ccefd211848b7b810b4772 Mon Sep 17 00:00:00 2001 From: ulli Date: Mon, 13 Aug 2018 10:28:43 -0400 Subject: [PATCH 1/2] Ignore vim swap files. --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 20e35aa8..87e01a92 100644 --- a/.gitignore +++ b/.gitignore @@ -69,6 +69,9 @@ docs/_build/ *_minted-* *.pdf +# vim artifacts +*.swp + # specific directories examples/malawi/resources/ examples/satellites/splits/ From f5c51f949c6a1a4773d593c98483cd43b19e3f35 Mon Sep 17 00:00:00 2001 From: ulli Date: Mon, 13 Aug 2018 10:29:26 -0400 Subject: [PATCH 2/2] Path loomcat to take in an optional mapping between col # and col name. --- src/crosscat/loomcat.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/crosscat/loomcat.py b/src/crosscat/loomcat.py index 64abd683..66f8cfd8 100644 --- a/src/crosscat/loomcat.py +++ b/src/crosscat/loomcat.py @@ -101,16 +101,20 @@ def _retrieve_column_partition(path, sample): ])) -def _retrieve_featureid_to_cgpm(path): +def _retrieve_featureid_to_cgpm(path, colname_colno_mapping=None): """Returns a dict mapping loom's 0-based featureid to cgpm.outputs.""" # Loom orders features alphabetically based on statistical types: # i.e. 'bb' < 'dd' < 'nich'. The ordering is stored in # `ingest/encoding.json.gz`. encoding_in = os.path.join(path, 'ingest', 'encoding.json.gz') features = json_load(encoding_in) - def colname_to_output(cname): - # Convert dummy column name from 'c00012' to the integer 12. - return int(cname.replace('c', '')) + if colname_colno_mapping is not None: + def colname_to_output(cname): + return colname_colno_mapping[cname] + else: + def colname_to_output(cname): + # Convert dummy column name from 'c00012' to the integer 12. + return int(cname.replace('c', '')) return { i: colname_to_output(f['name']) for i, f in enumerate(features) } @@ -136,7 +140,7 @@ def _retrieve_row_partitions(path, sample): } -def _update_state(state, path, sample): +def _update_state(state, path, sample, colname_colno_mapping=None): """Updates `state` to match the CrossCat `sample` at `path`. Only the row and column partitions are updated; parameter inference @@ -145,15 +149,14 @@ def _update_state(state, path, sample): Wild errors will occur if the Loom object is incompatible with `state`. """ - # Retrieve the new column partition from loom. Zv_new_raw = _retrieve_column_partition(path, sample) assert sorted(Zv_new_raw.keys()) == range(len(state.outputs)) - # The keys of Zv are contiguous # from [0..len(outputs)], while state.outputs are arbitrary integers, so we # need to map the loom feature ids correctly. - output_mapping = _retrieve_featureid_to_cgpm(path) + output_mapping = _retrieve_featureid_to_cgpm(path, colname_colno_mapping) + assert sorted(output_mapping.values()) == sorted(state.outputs) Zv_new = {output_mapping[f]: Zv_new_raw[f] for f in Zv_new_raw}