From 3ce16f6a79e8b7f0221d2c743013de2676fab5f8 Mon Sep 17 00:00:00 2001 From: Chris Brozdowski Date: Tue, 7 Nov 2023 16:31:38 -0800 Subject: [PATCH] Key length solve: Reduce varchars (#664) * Contrib doc edits * #531 * #532: PR Template * Fix numbering * Typo * #658 config. Start to phase out config dir * Update changelog * Add docstrings. Separate sql content from funcs * #630 - Reduce varchar approach * More varchar edits * Fix typo. Note all Reduce Varchar changes * Fix linting * Blackify * Update changelog, citation * typo * Remove unwanted import --- CHANGELOG.md | 3 ++- CITATION.cff | 4 +-- src/spyglass/common/common_interval.py | 4 ++- src/spyglass/common/common_nwbfile.py | 8 ++++-- src/spyglass/common/common_position.py | 4 ++- src/spyglass/decoding/__init__.py | 1 - src/spyglass/decoding/clusterless.py | 27 +++++++++++-------- src/spyglass/figurl_views/SpikeSortingView.py | 2 +- src/spyglass/lfp/v1/lfp_artifact.py | 10 ++++--- src/spyglass/lock/file_lock.py | 2 ++ src/spyglass/position/position_merge.py | 4 ++- .../position_linearization/v1/__init__.py | 2 +- .../spikesorting/spikesorting_artifact.py | 4 ++- .../spikesorting/spikesorting_curation.py | 9 +++++-- .../spikesorting/spikesorting_recording.py | 10 ++++--- .../spikesorting/spikesorting_sorting.py | 17 +++++++----- src/spyglass/utils/dj_merge_tables.py | 16 +++++++++-- 17 files changed, 86 insertions(+), 41 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ce2c5d367..1c682248d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,12 @@ # Change Log -## [Unreleased] +## [0.4.3] (November 7, 2023) - Migrate `config` helper scripts to Spyglass codebase. #662 - Revise contribution guidelines. #655 - Minor bug fixes. #656, #657, #659, #651, #671 - Add setup instruction specificity. +- Reduce primary key varchar allocation aross may tables. #664 ## [0.4.2] (October 10, 2023) diff --git a/CITATION.cff b/CITATION.cff index 6ddc71ed9..fa138ea8d 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -10,6 +10,6 @@ authors: - family-names: "Frank" given-names: "Loren" title: "spyglass" -version: 0.4.2 -date-released: 2023-10-10 +version: 0.4.3 +date-released: 2023-11-07 url: "https://github.com/LorenFrankLab/spyglass" diff --git a/src/spyglass/common/common_interval.py b/src/spyglass/common/common_interval.py index 07630c494..99c9a5bdf 100644 --- a/src/spyglass/common/common_interval.py +++ b/src/spyglass/common/common_interval.py @@ -18,11 +18,13 @@ class IntervalList(dj.Manual): definition = """ # Time intervals used for analysis -> Session - interval_list_name: varchar(200) # descriptive name of this interval list + interval_list_name: varchar(170) # descriptive name of this interval list --- valid_times: longblob # numpy array with start/end times for each interval """ + # See #630, #664. Excessive key length. + @classmethod def insert_from_nwbfile(cls, nwbf, *, nwb_file_name): """Add each entry in the NWB file epochs table to the IntervalList. diff --git a/src/spyglass/common/common_nwbfile.py b/src/spyglass/common/common_nwbfile.py index 4458a1162..8ee62c834 100644 --- a/src/spyglass/common/common_nwbfile.py +++ b/src/spyglass/common/common_nwbfile.py @@ -42,7 +42,7 @@ class Nwbfile(dj.Manual): definition = """ # Table for holding the NWB files. - nwb_file_name: varchar(255) # name of the NWB file + nwb_file_name: varchar(64) # name of the NWB file --- nwb_file_abs_path: filepath@raw INDEX (nwb_file_abs_path) @@ -50,6 +50,8 @@ class Nwbfile(dj.Manual): # NOTE the INDEX above is implicit from filepath@... above but needs to be explicit # so that alter() can work + # NOTE: See #630, #664. Excessive key length. + @classmethod def insert_from_relative_file_name(cls, nwb_file_name): """Insert a new session from an existing NWB file. @@ -149,7 +151,7 @@ def cleanup(delete_files=False): class AnalysisNwbfile(dj.Manual): definition = """ # Table for holding the NWB files that contain results of analysis, such as spike sorting. - analysis_file_name: varchar(255) # name of the file + analysis_file_name: varchar(64) # name of the file --- -> Nwbfile # name of the parent NWB file. Used for naming and metadata copy analysis_file_abs_path: filepath@analysis # the full path to the file @@ -161,6 +163,8 @@ class AnalysisNwbfile(dj.Manual): # NOTE the INDEX above is implicit from filepath@... above but needs to be explicit # so that alter() can work + # See #630, #664. Excessive key length. + def create(self, nwb_file_name): """Open the NWB file, create a copy, write the copy to disk and return the name of the new file. diff --git a/src/spyglass/common/common_position.py b/src/spyglass/common/common_position.py index 259958181..3ebab0403 100644 --- a/src/spyglass/common/common_position.py +++ b/src/spyglass/common/common_position.py @@ -39,7 +39,7 @@ class PositionInfoParameters(dj.Lookup): """ definition = """ - position_info_param_name : varchar(80) # name for this set of parameters + position_info_param_name : varchar(32) # name for this set of parameters --- max_separation = 9.0 : float # max distance (in cm) between head LEDs max_speed = 300.0 : float # max speed (in cm / s) of animal @@ -53,6 +53,8 @@ class PositionInfoParameters(dj.Lookup): # pandas.DataFrame.interpolation for list of methods """ + # See #630, #664. Excessive key length. + @schema class IntervalPositionInfoSelection(dj.Lookup): diff --git a/src/spyglass/decoding/__init__.py b/src/spyglass/decoding/__init__.py index 735488e9e..3c5f7381d 100644 --- a/src/spyglass/decoding/__init__.py +++ b/src/spyglass/decoding/__init__.py @@ -3,7 +3,6 @@ ClusterlessClassifierParameters, MarkParameters, MultiunitFiringRate, - MultiunitHighSynchronyEvents, MultiunitHighSynchronyEventsParameters, UnitMarkParameters, UnitMarks, diff --git a/src/spyglass/decoding/clusterless.py b/src/spyglass/decoding/clusterless.py index 86f375f19..b530786c4 100644 --- a/src/spyglass/decoding/clusterless.py +++ b/src/spyglass/decoding/clusterless.py @@ -32,18 +32,18 @@ from replay_trajectory_classification.initial_conditions import ( UniformInitialConditions, ) - from ripple_detection import ( get_multiunit_population_firing_rate, multiunit_HSE_detector, ) -from spyglass.common.common_interval import IntervalList -from spyglass.common.common_nwbfile import AnalysisNwbfile -from spyglass.common.common_position import IntervalPositionInfo -from spyglass.utils.dj_helper_fn import fetch_nwb +from tqdm.auto import tqdm + from spyglass.common.common_behav import ( convert_epoch_interval_name_to_position_interval_name, ) +from spyglass.common.common_interval import IntervalList +from spyglass.common.common_nwbfile import AnalysisNwbfile +from spyglass.common.common_position import IntervalPositionInfo from spyglass.decoding.core import ( convert_valid_times_to_slice, get_valid_ephys_position_times_by_epoch, @@ -61,7 +61,7 @@ SpikeSorting, SpikeSortingSelection, ) -from tqdm.auto import tqdm +from spyglass.utils.dj_helper_fn import fetch_nwb schema = dj.schema("decoding_clusterless") @@ -72,13 +72,15 @@ class MarkParameters(dj.Manual): time.""" definition = """ - mark_param_name : varchar(80) # a name for this set of parameters + mark_param_name : varchar(32) # a name for this set of parameters --- # the type of mark. Currently only 'amplitude' is supported mark_type = 'amplitude': varchar(40) mark_param_dict: BLOB # dictionary of parameters for the mark extraction function """ + # NOTE: See #630, #664. Excessive key length. + def insert_default(self): """Insert the default parameter set @@ -665,18 +667,20 @@ def insert_default(self): ) -@schema +""" +NOTE: Table decommissioned. See #630, #664. Excessive key length. + class MultiunitHighSynchronyEvents(dj.Computed): - """Finds times of high mulitunit activity during immobility.""" + "Finds times of high mulitunit activity during immobility." - definition = """ + definition = " -> MultiunitHighSynchronyEventsParameters -> UnitMarksIndicator -> IntervalPositionInfo --- -> AnalysisNwbfile multiunit_hse_times_object_id: varchar(40) - """ + " def make(self, key): marks = (UnitMarksIndicator & key).fetch_xarray() @@ -712,6 +716,7 @@ def make(self, key): ) self.insert1(key) +""" def get_decoding_data_for_epoch( diff --git a/src/spyglass/figurl_views/SpikeSortingView.py b/src/spyglass/figurl_views/SpikeSortingView.py index 7f460d767..c738ad884 100644 --- a/src/spyglass/figurl_views/SpikeSortingView.py +++ b/src/spyglass/figurl_views/SpikeSortingView.py @@ -5,7 +5,7 @@ SpikeSortingView as SortingViewSpikeSortingView, ) -from ..common.common_spikesorting import SpikeSorting, SpikeSortingRecording +from ..spikesorting import SpikeSorting, SpikeSortingRecording from .prepare_spikesortingview_data import prepare_spikesortingview_data schema = dj.schema("figurl_view_spike_sorting_recording") diff --git a/src/spyglass/lfp/v1/lfp_artifact.py b/src/spyglass/lfp/v1/lfp_artifact.py index 3c6389af4..e5124eab2 100644 --- a/src/spyglass/lfp/v1/lfp_artifact.py +++ b/src/spyglass/lfp/v1/lfp_artifact.py @@ -1,13 +1,13 @@ import datajoint as dj +import numpy as np +from spyglass.common import get_electrode_indices from spyglass.common.common_interval import IntervalList from spyglass.lfp.v1.lfp import LFPV1 from spyglass.lfp.v1.lfp_artifact_difference_detection import ( difference_artifact_detector, ) from spyglass.lfp.v1.lfp_artifact_MAD_detection import mad_artifact_detector -import numpy as np -from spyglass.common import get_electrode_indices schema = dj.schema("lfp_v1") @@ -182,7 +182,6 @@ def make(self, key): key["target_interval_list_name"], "LFP", key["artifact_params_name"], - "artifact_removed_valid_times", ] ), ) @@ -204,9 +203,12 @@ class LFPArtifactRemovedIntervalList(dj.Manual): definition = """ # Stores intervals without detected artifacts. Entries can come from either # ArtifactDetection() or alternative artifact removal analyses. - artifact_removed_interval_list_name: varchar(200) + artifact_removed_interval_list_name: varchar(128) --- -> LFPArtifactDetectionSelection artifact_removed_valid_times: longblob artifact_times: longblob # np.array of artifact intervals """ + + # See #630, #664. Excessive key length. + # 200 enties in database over 128. If string removed as above, all fit. diff --git a/src/spyglass/lock/file_lock.py b/src/spyglass/lock/file_lock.py index 08fd278a3..079a4723d 100644 --- a/src/spyglass/lock/file_lock.py +++ b/src/spyglass/lock/file_lock.py @@ -4,6 +4,8 @@ schema = dj.schema("file_lock") +from ..common.common_nwbfile import AnalysisNwbfile, Nwbfile + @schema class NwbfileLock(dj.Manual): diff --git a/src/spyglass/position/position_merge.py b/src/spyglass/position/position_merge.py index b20a3c0d9..86de6194f 100644 --- a/src/spyglass/position/position_merge.py +++ b/src/spyglass/position/position_merge.py @@ -88,13 +88,15 @@ def fetch1_dataframe(self): class PositionVideoSelection(dj.Manual): definition = """ nwb_file_name : varchar(255) # name of the NWB file - interval_list_name : varchar(200) # descriptive name of this interval list + interval_list_name : varchar(170) # descriptive name of this interval list plot_id : int plot : varchar(40) # Which position info to overlay on video file --- output_dir : varchar(255) # directory where to save output video """ + # NOTE: See #630, #664. Excessive key length. + def insert1(self, key, **kwargs): key["plot_id"] = self.get_plotid(key) super().insert1(key, **kwargs) diff --git a/src/spyglass/position_linearization/v1/__init__.py b/src/spyglass/position_linearization/v1/__init__.py index ec3dffc65..46d287a45 100644 --- a/src/spyglass/position_linearization/v1/__init__.py +++ b/src/spyglass/position_linearization/v1/__init__.py @@ -1,7 +1,7 @@ +from spyglass.common.common_position import NodePicker from spyglass.position_linearization.v1.linearization import ( LinearizationParameters, LinearizationSelection, LinearizedPositionV1, - NodePicker, TrackGraph, ) diff --git a/src/spyglass/spikesorting/spikesorting_artifact.py b/src/spyglass/spikesorting/spikesorting_artifact.py index 0532357c1..9ae951f90 100644 --- a/src/spyglass/spikesorting/spikesorting_artifact.py +++ b/src/spyglass/spikesorting/spikesorting_artifact.py @@ -140,13 +140,15 @@ class ArtifactRemovedIntervalList(dj.Manual): definition = """ # Stores intervals without detected artifacts. # Note that entries can come from either ArtifactDetection() or alternative artifact removal analyses. - artifact_removed_interval_list_name: varchar(200) + artifact_removed_interval_list_name: varchar(170) --- -> ArtifactDetectionSelection artifact_removed_valid_times: longblob artifact_times: longblob # np array of artifact intervals """ + # See #630, #664. Excessive key length. + def _get_artifact_times( recording: si.BaseRecording, diff --git a/src/spyglass/spikesorting/spikesorting_curation.py b/src/spyglass/spikesorting/spikesorting_curation.py index b0699ba42..5b6703271 100644 --- a/src/spyglass/spikesorting/spikesorting_curation.py +++ b/src/spyglass/spikesorting/spikesorting_curation.py @@ -385,10 +385,13 @@ def _get_waveform_extractor_name(self, key): class MetricParameters(dj.Manual): definition = """ # Parameters for computing quality metrics of sorted units - metric_params_name: varchar(200) + metric_params_name: varchar(64) --- metric_params: blob """ + + # NOTE: See #630, #664. Excessive key length. + metric_default_params = { "snr": { "peak_sign": "neg", @@ -645,12 +648,14 @@ def _get_num_spikes( @schema class AutomaticCurationParameters(dj.Manual): definition = """ - auto_curation_params_name: varchar(200) # name of this parameter set + auto_curation_params_name: varchar(36) # name of this parameter set --- merge_params: blob # dictionary of params to merge units label_params: blob # dictionary params to label units """ + # NOTE: No existing entries impacted by this change + def insert1(self, key, **kwargs): # validate the labels and then insert # TODO: add validation for merge_params diff --git a/src/spyglass/spikesorting/spikesorting_recording.py b/src/spyglass/spikesorting/spikesorting_recording.py index cdad793e9..f6abb72b5 100644 --- a/src/spyglass/spikesorting/spikesorting_recording.py +++ b/src/spyglass/spikesorting/spikesorting_recording.py @@ -20,8 +20,8 @@ from ..common.common_lab import LabTeam # noqa: F401 from ..common.common_nwbfile import Nwbfile from ..common.common_session import Session # noqa: F401 -from ..utils.dj_helper_fn import dj_replace from ..settings import recording_dir +from ..utils.dj_helper_fn import dj_replace schema = dj.schema("spikesorting_recording") @@ -321,19 +321,23 @@ def get_geometry(self, sort_group_id, nwb_file_name): class SortInterval(dj.Manual): definition = """ -> Session - sort_interval_name: varchar(200) # name for this interval + sort_interval_name: varchar(64) # name for this interval --- sort_interval: longblob # 1D numpy array with start and end time for a single interval to be used for spike sorting """ + # NOTE: See #630, #664. Excessive key length. + @schema class SpikeSortingPreprocessingParameters(dj.Manual): definition = """ - preproc_params_name: varchar(200) + preproc_params_name: varchar(32) --- preproc_params: blob """ + # NOTE: Reduced key less than 2 existing entries + # All existing entries are below 48 def insert_default(self): # set up the default filter parameters diff --git a/src/spyglass/spikesorting/spikesorting_sorting.py b/src/spyglass/spikesorting/spikesorting_sorting.py index 8178ef513..955131db8 100644 --- a/src/spyglass/spikesorting/spikesorting_sorting.py +++ b/src/spyglass/spikesorting/spikesorting_sorting.py @@ -14,7 +14,7 @@ from ..common.common_lab import LabMember, LabTeam from ..common.common_nwbfile import AnalysisNwbfile -from ..settings import temp_dir, sorting_dir +from ..settings import sorting_dir, temp_dir from .spikesorting_artifact import ArtifactRemovedIntervalList from .spikesorting_recording import ( SpikeSortingRecording, @@ -27,12 +27,14 @@ @schema class SpikeSorterParameters(dj.Manual): definition = """ - sorter: varchar(200) - sorter_params_name: varchar(200) + sorter: varchar(32) + sorter_params_name: varchar(64) --- sorter_params: blob """ + # NOTE: See #630, #664. Excessive key length. + def insert_default(self): """Default params from spike sorters available via spikeinterface""" sorters = sis.available_sorters() @@ -236,10 +238,11 @@ def make(self, key: dict): self.insert1(key) def delete(self): - """Extends the delete method of base class to implement permission checking. - Note that this is NOT a security feature, as anyone that has access to source code - can disable it; it just makes it less likely to accidentally delete entries. - """ + """Extends the delete method of base class to implement permission + checking. Note that this is NOT a security feature, as anyone that has + access to source code can disable it; it just makes it less likely to + accidentally delete entries.""" + current_user_name = dj.config["database.user"] entries = self.fetch() permission_bool = np.zeros((len(entries),)) diff --git a/src/spyglass/utils/dj_merge_tables.py b/src/spyglass/utils/dj_merge_tables.py index b03af092e..2e184c66a 100644 --- a/src/spyglass/utils/dj_merge_tables.py +++ b/src/spyglass/utils/dj_merge_tables.py @@ -727,7 +727,10 @@ def delete_downstream_merge( def _unique_descendants( - table: dj.Table, recurse_level: int, return_names: bool = False + table: dj.Table, + recurse_level: int = 2, + return_names: bool = False, + attribute=None, ) -> list: """Recurisively find unique descendants of a given table @@ -739,6 +742,8 @@ def _unique_descendants( The maximum level of descendants to find. return_names: bool If True, return names of descendants found. Else return Table objects. + attribute: str, optional + If provided, only return descendants that have this attribute. Returns ------- @@ -749,11 +754,18 @@ def _unique_descendants( if recurse_level == 0: return [] + if attribute is None: + skip_attr_check = True + else: + skip_attr_check = False + descendants = {} def recurse_descendants(sub_table, level): for descendant in sub_table.descendants(as_objects=True): - if descendant.full_table_name not in descendants: + if descendant.full_table_name not in descendants and ( + skip_attr_check or attribute in descendant.heading.attributes + ): descendants[descendant.full_table_name] = descendant if level > 1: recurse_descendants(descendant, level - 1)