diff --git a/.github/workflows/publish-docs.yml b/.github/workflows/publish-docs.yml index db1cf6224..28c566665 100644 --- a/.github/workflows/publish-docs.yml +++ b/.github/workflows/publish-docs.yml @@ -2,7 +2,7 @@ name: Publish docs on: push: tags: # See PEP 440 for valid version format - - "*.*.*" # For docs bump, use X.X.XaX + - "*.*.*" # For docs bump, use workflow_dispatch branches: - test_branch workflow_dispatch: # Manually trigger with 'Run workflow' button @@ -23,13 +23,17 @@ jobs: issues: write steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: token: ${{ secrets.GITHUB_TOKEN }} fetch-depth: 0 + - name: Git describe # Get tags + id: ghd # see Deploy below. Will fail if no tags on branch + uses: proudust/gh-describe@v2 + - name: Set up Python runtime - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.9 token: ${{ secrets.GITHUB_TOKEN }} @@ -40,8 +44,8 @@ jobs: git config user.name 'github-actions[bot]' && git config user.email 'github-actions[bot]@users.noreply.github.com' - name: Deploy - run: | - FULL_VERSION=${{ github.ref_name }} + run: | # github.ref_name is branch name if dispatch + FULL_VERSION=${{ steps.ghd.outputs.tag }} export MAJOR_VERSION=${FULL_VERSION:0:3} echo "OWNER: ${REPO_OWNER}. BUILD: ${MAJOR_VERSION}" bash ./docs/build-docs.sh push $REPO_OWNER diff --git a/.github/workflows/test-package-build.yml b/.github/workflows/test-package-build.yml index c93b77398..3513bb664 100644 --- a/.github/workflows/test-package-build.yml +++ b/.github/workflows/test-package-build.yml @@ -64,7 +64,7 @@ jobs: with: name: archive path: archive/ - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: 3.9 - name: Display Python version diff --git a/.markdownlint.yaml b/.markdownlint.yaml index 5c9c3712b..f57fbf732 100644 --- a/.markdownlint.yaml +++ b/.markdownlint.yaml @@ -1,8 +1,10 @@ # https://github.com/DavidAnson/markdownlint # https://github.com/DavidAnson/markdownlint/blob/main/doc/Rules.md -MD007: false # permit indenting 4 spaces instead of 2 +MD007: # permit indenting 4 spaces instead of 2 + indent: 4 + start_indent: 4 MD013: - line_length: "80" # Line length limits + line_length: 80 # Line length limits tables: false # disable for tables code_blocks: false # disable for code blocks MD025: false # permit adjacent headings diff --git a/CHANGELOG.md b/CHANGELOG.md index 449ccd8de..ba21ee169 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ ## [0.5.3] (Unreleased) -### Release Notes +## Release Notes @@ -36,10 +36,14 @@ PositionGroup.alter() - Ensure integrity of group tables #1026 - Convert list of LFP artifact removed interval list to array #1046 - Merge duplicate functions in decoding and spikesorting #1050 +- Revise docs organization. + - Misc -> Features/ForDevelopers. #1029 + - Installation instructions -> Setup notebook. #1029 ### Pipelines - Common + - `PositionVideo` table now inserts into self after `make` #966 - Don't insert lab member when creating lab team #983 - Files created by `AnalysisNwbfile.create()` receive new object_id #999 @@ -51,10 +55,14 @@ PositionGroup.alter() - `PositionIntervalMap` now inserts null entries for missing intervals #870 - `AnalysisFileLog` now truncates table names that exceed field length #1021 - Disable logging with `AnalysisFileLog` #1024 + - Decoding: + - Default values for classes on `ImportError` #966 - Add option to upsample data rate in `PositionGroup` #1008 + - Position + - Allow dlc without pre-existing tracking data #973, #975 - Raise `KeyError` for missing input parameters across helper funcs #966 - `DLCPosVideo` table now inserts into self after `make` #966 @@ -67,7 +75,9 @@ PositionGroup.alter() `get_video_info` to reflect actual use #870 - Fix `red_led_bisector` `np.nan` handling issue from #870. Fixed in #1034 - Fix `one_pt_centoid` `np.nan` handling issue from #870. Fixed in #1034 + - Spikesorting + - Allow user to set smoothing timescale in `SortedSpikesGroup.get_firing_rate` #994 - Update docstrings #996 diff --git a/CITATION.cff b/CITATION.cff index 6fc0e83aa..ed9dd3cc5 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -2,7 +2,7 @@ # Visit https://bit.ly/cffinit to generate yours today! cff-version: 1.2.0 -title: spyglass +title: 'Spyglass: a data analysis framework for reproducible and shareable neuroscience research' message: 'If you use this software, please cite it as below.' type: software authors: @@ -84,18 +84,18 @@ authors: email: emrey.broyles@ucsf.edu affiliation: 'University of California, San Francisco' orcid: 'https://orcid.org/0000-0001-5559-2910' - - given-names: Shin - family-names: Donghoon + - given-names: Donghoon + family-names: Shin email: donghoon.shin@ucsf.edu affiliation: 'University of California, San Francisco' orcid: 'https://orcid.org/0009-0000-8916-7314' - - given-names: Chiang - family-names: Sharon + - given-names: Sharon + family-names: Chiang email: sharon.chiang@ucsf.edu affiliation: 'University of California, San Francisco' orcid: 'https://orcid.org/0000-0002-4548-4550' - - given-names: Holobetz - family-names: Cristofer + - given-names: Cristofer + family-names: Holobetz email: cristofer.holobetz.23@ucl.ac.uk affiliation: 'University College London' orcid: 'https://orcid.org/0009-0009-8567-3290' diff --git a/README.md b/README.md index 42c0e0357..d2e4e5e47 100644 --- a/README.md +++ b/README.md @@ -5,14 +5,20 @@ ![Spyglass Figure](docs/src/images/fig1.png) -[Demo](https://spyglass.hhmi.2i2c.cloud/hub/user-redirect/git-pull?repo=https%3A%2F%2Fgithub.com%2FLorenFrankLab%2Fspyglass-demo&urlpath=lab%2Ftree%2Fspyglass-demo%2Fnotebooks%2F01_Insert_Data.ipynb&branch=main) | [Installation](https://lorenfranklab.github.io/spyglass/latest/installation/) | [Docs](https://lorenfranklab.github.io/spyglass/) | [Tutorials](https://github.com/LorenFrankLab/spyglass/tree/master/notebooks) | [Citation](#citation) +[Demo](https://spyglass.hhmi.2i2c.cloud/hub/user-redirect/git-pull?repo=https%3A%2F%2Fgithub.com%2FLorenFrankLab%2Fspyglass-demo&urlpath=lab%2Ftree%2Fspyglass-demo%2Fnotebooks%2F02_Insert_Data.ipynb&branch=main) +| +[Installation](https://lorenfranklab.github.io/spyglass/latest/notebooks/00_Setup/) +| [Docs](https://lorenfranklab.github.io/spyglass/) | +[Tutorials](https://github.com/LorenFrankLab/spyglass/tree/master/notebooks) | +[Citation](#citation) `spyglass` is a data analysis framework that facilitates the storage, analysis, visualization, and sharing of neuroscience data to support reproducible research. It is designed to be interoperable with the NWB format and integrates open-source tools into a coherent framework. -Try out a demo [here](https://spyglass.hhmi.2i2c.cloud/hub/user-redirect/git-pull?repo=https%3A%2F%2Fgithub.com%2FLorenFrankLab%2Fspyglass-demo&urlpath=lab%2Ftree%2Fspyglass-demo%2Fnotebooks%2F01_Insert_Data.ipynb&branch=main)! +Try out a demo +[here](https://spyglass.hhmi.2i2c.cloud/hub/user-redirect/git-pull?repo=https%3A%2F%2Fgithub.com%2FLorenFrankLab%2Fspyglass-demo&urlpath=lab%2Ftree%2Fspyglass-demo%2Fnotebooks%2F02_Insert_Data.ipynb&branch=main)! Features of Spyglass include: @@ -60,16 +66,16 @@ Documentation can be found at - ## Installation For installation instructions see - -[https://lorenfranklab.github.io/spyglass/latest/installation/](https://lorenfranklab.github.io/spyglass/latest/installation/) +[https://lorenfranklab.github.io/spyglass/latest/notebooks/00_Setup/](https://lorenfranklab.github.io/spyglass/latest/notebooks/00_Setup/) Typical installation time is: 5-10 minutes ## Tutorials -The tutorials for `spyglass` is currently in the form of Jupyter Notebooks and +The tutorials for `spyglass` are currently in the form of Jupyter Notebooks and can be found in the [notebooks](https://github.com/LorenFrankLab/spyglass/tree/master/notebooks) -directory. We strongly recommend opening them in the context of `jupyterlab`. +directory. We strongly recommend running the notebooks yourself. ## Contributing @@ -85,17 +91,31 @@ License and Copyright notice can be found at ## System requirements -Spyglass has been tested on Linux Ubuntu 20.04 and MacOS 10.15. It has not been tested on Windows and likely will not work. +Spyglass has been tested on Linux Ubuntu 20.04 and MacOS 10.15. It has not been +tested on Windows and likely will not work. -No specific hardware requirements are needed to run spyglass. However, the amount of data that can be stored and analyzed is limited by the available disk space and memory. GPUs are required for some of the analysis tools, such as DeepLabCut. +No specific hardware requirements are needed to run spyglass. However, the +amount of data that can be stored and analyzed is limited by the available disk +space and memory. GPUs are required for some of the analysis tools, such as +DeepLabCut. -See [pyproject.toml](pyproject.toml), [environment.yml](environment.yml), or [environment_dlc.yml](environment_dlc.yml) for software dependencies. +See [pyproject.toml](pyproject.toml), [environment.yml](environment.yml), or +[environment_dlc.yml](environment_dlc.yml) for software dependencies. -See [spec-file.txt](https://github.com/LorenFrankLab/spyglass-demo/blob/main/spec-file/spec-file.txt) for the conda environment used in the demo. +See +[spec-file.txt](https://github.com/LorenFrankLab/spyglass-demo/blob/main/spec-file/spec-file.txt) +for the conda environment used in the demo. ## Citation -> Lee, K.H.\*, Denovellis, E.L.\*, Ly, R., Magland, J., Soules, J., Comrie, A.E., Gramling, D.P., Guidera, J.A., Nevers, R., Adenekan, P., Brozdowski, C., Bray, S., Monroe, E., Bak, J.H., Coulter, M.E., Sun, X., Broyles, E., Shin, D., Chiang, S., Holobetz, C., Tritt, A., Rübel, O., Nguyen, T., Yatsenko, D., Chu, J., Kemere, C., Garcia, S., Buccino, A., Frank, L.M., 2024. Spyglass: a data analysis framework for reproducible and shareable neuroscience research. bioRxiv. [10.1101/2024.01.25.577295](https://doi.org/10.1101/2024.01.25.577295). +> Lee, K.H.\*, Denovellis, E.L.\*, Ly, R., Magland, J., Soules, J., Comrie, +> A.E., Gramling, D.P., Guidera, J.A., Nevers, R., Adenekan, P., Brozdowski, C., +> Bray, S., Monroe, E., Bak, J.H., Coulter, M.E., Sun, X., Broyles, E., Shin, +> D., Chiang, S., Holobetz, C., Tritt, A., Rübel, O., Nguyen, T., Yatsenko, D., +> Chu, J., Kemere, C., Garcia, S., Buccino, A., Frank, L.M., 2024. Spyglass: a +> data analysis framework for reproducible and shareable neuroscience research. +> bioRxiv. +> [10.1101/2024.01.25.577295](https://doi.org/10.1101/2024.01.25.577295). *\* Equal contribution* diff --git a/docs/README.md b/docs/README.md index 8eee3f1a4..0ae399532 100644 --- a/docs/README.md +++ b/docs/README.md @@ -16,11 +16,9 @@ The remainder of `mkdocs.yml` specifies the site's ## GitHub Whenever a new tag is pushed, GitHub actions will run -`.github/workflows/publish-docs.yml`. Progress can be monitored in the 'Actions' -tab within the repo. - -Releases should be tagged with `X.Y.Z`. A tag to redeploy docs should use the -current version, with an alpha release suffix, e.g. `X.Y.Za1`. +`.github/workflows/publish-docs.yml`. From the repository, select the Actions +tab, and then the 'Publish Docs' workflow on the left to monitor progress. The +process can also be manually triggered by selecting 'Run workflow' on the right. To deploy on your own fork without a tag, follow turn on github pages in settings, following a `documentation` branch, and then push to `test_branch`. @@ -47,7 +45,7 @@ the root notebooks directory may not be reflected when rebuilding. Use a browser to navigate to `localhost:8000/` to inspect the site. For auto-reload of markdown files during development, use `mkdocs serve -f ./docs/mkdosc.yaml`. The `mike` package used in the build -script manages versioning, but does not support dynamic versioning. +script manages versioning, but does not support dynamic reloading. The following items can be commented out in `mkdocs.yml` to reduce build time: diff --git a/docs/build-docs.sh b/docs/build-docs.sh index 50d44f511..44b383853 100755 --- a/docs/build-docs.sh +++ b/docs/build-docs.sh @@ -14,12 +14,25 @@ mv ./docs/src/notebooks/README.md ./docs/src/notebooks/index.md cp -r ./notebook-images ./docs/src/notebooks/ cp -r ./notebook-images ./docs/src/ -if [ -z "$MAJOR_VERSION" ]; then # Get version from file - version_line=$(grep "__version__ =" ./src/spyglass/_version.py) - version_string=$(echo "$version_line" | awk -F"[\"']" '{print $2}') +# Function for checking major version format: #.# +check_format() { + local version="$1" + if [[ $version =~ ^[0-9]+\.[0-9]+$ ]]; then + return 0 + else + return 1 + fi +} + +# Check if the MAJOR_VERSION not defined or does not meet format criteria +if [ -z "$MAJOR_VERSION" ] || ! check_format "$MAJOR_VERSION"; then + full_version=$(git describe --tags --abbrev=0) export MAJOR_VERSION="${version_string:0:3}" fi -echo "$MAJOR_VERSION" # May be available as env var +if ! check_format "$MAJOR_VERSION"; then + export MAJOR_VERSION="dev" # Fallback to dev if still not valid +fi +echo "$MAJOR_VERSION" # Get ahead of errors export JUPYTER_PLATFORM_DIRS=1 diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 88734f3a0..30d2bd79d 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -42,15 +42,14 @@ theme: nav: - Home: index.md - - Installation: installation.md - Tutorials: - Overview: notebooks/index.md - Intro: - Setup: notebooks/00_Setup.ipynb - - Insert Data: notebooks/01_Insert_Data.ipynb - - Data Sync: notebooks/02_Data_Sync.ipynb - - Merge Tables: notebooks/03_Merge_Tables.ipynb - - Config Populate: notebooks/04_PopulateConfigFile.ipynb + - Concepts: notebooks/01_Concepts.ipynb + - Insert Data: notebooks/02_Insert_Data.ipynb + - Data Sync: notebooks/03_Data_Sync.ipynb + - Merge Tables: notebooks/04_Merge_Tables.ipynb - Export: notebooks/05_Export.ipynb - Spikes: - Spike Sorting V0: notebooks/10_Spike_SortingV0.ipynb @@ -70,18 +69,22 @@ nav: - Decoding Clusterless: notebooks/41_Decoding_Clusterless.ipynb - Decoding Sorted Spikes: notebooks/42_Decoding_SortedSpikes.ipynb - MUA Detection: notebooks/50_MUA_Detection.ipynb - - Miscellaneous: - - Overview: misc/index.md - - Common Errors: misc/common_errs.md - - Database Management: misc/database_management.md - - Export: misc/export.md - - FigURL: misc/figurl_views.md - - Insert Data: misc/insert_data.md - - Merge Tables: misc/merge_tables.md - - Mixin: misc/mixin.md - - Session Groups: misc/session_groups.md + - Features: + - Overview: Features/index.md + - FigURL: Features/FigURL.md + - Merge Tables: Features/Merge.md + - Export: Features/Export.md + - Session Groups: Features/SessionGroups.md + - Centralized Code: Features/Mixin.md + - For Developers: + - Overview: ForDevelopers/index.md + - How to Contribute: ForDevelopers/Contribute.md + - Database Management: ForDevelopers/Management.md + - Code Reuse: ForDevelopers/Reuse.md + - Table Types: ForDevelopers/TableTypes.md + - Understanding a Schema: ForDevelopers/Schema.md + - Using NWB: ForDevelopers/UsingNWB.md - API Reference: api/ # defer to gen-files + literate-nav - - How to Contribute: contribute.md - Change Log: CHANGELOG.md - Copyright: LICENSE.md diff --git a/docs/src/misc/export.md b/docs/src/Features/Export.md similarity index 100% rename from docs/src/misc/export.md rename to docs/src/Features/Export.md diff --git a/docs/src/misc/figurl_views.md b/docs/src/Features/FigURL.md similarity index 100% rename from docs/src/misc/figurl_views.md rename to docs/src/Features/FigURL.md diff --git a/docs/src/misc/merge_tables.md b/docs/src/Features/Merge.md similarity index 100% rename from docs/src/misc/merge_tables.md rename to docs/src/Features/Merge.md diff --git a/docs/src/misc/mixin.md b/docs/src/Features/Mixin.md similarity index 91% rename from docs/src/misc/mixin.md rename to docs/src/Features/Mixin.md index 23135d3c4..ac227a7be 100644 --- a/docs/src/misc/mixin.md +++ b/docs/src/Features/Mixin.md @@ -6,7 +6,7 @@ functionalities that have been added to DataJoint tables. This includes... - Fetching NWB files - Long-distance restrictions. - Delete functionality, including permission checks and part/master pairs -- Export logging. See [export doc](export.md) for more information. +- Export logging. See [export doc](./Export.md) for more information. To add this functionality to your own tables, simply inherit from the mixin: @@ -53,8 +53,8 @@ to a `_nwb_table` attribute. In complicated pipelines like Spyglass, there are often tables that 'bury' their foreign keys as secondary keys. This is done to avoid having to pass a long list of foreign keys through the pipeline, potentially hitting SQL limits (see also -[Merge Tables](./merge_tables.md)). This burrying makes it difficult to restrict -a given table by familiar attributes. +[Merge Tables](./Merge.md)). This burrying makes it difficult to restrict a +given table by familiar attributes. Spyglass provides a function, `restrict_by`, to handle this. The function takes your restriction and checks parents/children until the restriction can be @@ -122,7 +122,7 @@ If the user shares a lab team with the session experimenter, the deletion is permitted. This is not secure system and is not a replacement for database backups (see -[database management](./database_management.md)). A user could readily +[database management](../ForDevelopers/Management.md)). A user could readily curcumvent the default permission checks by adding themselves to the relevant team or removing the mixin from the class declaration. However, it provides a reasonable level of security for the average user. @@ -134,11 +134,11 @@ entry without deleting the corresponding master. This is useful for enforcing the custom of adding/removing all parts of a master at once and avoids orphaned masters, or null entry masters without matching data. -For [Merge tables](./merge_tables.md), this is a significant problem. If a user -wants to delete all entries associated with a given session, she must find all -part table entries, including Merge tables, and delete them in the correct -order. The mixin provides a function, `delete_downstream_parts`, to handle this, -which is run by default when calling `delete`. +For [Merge tables](./Merge.md), this is a significant problem. If a user wants +to delete all entries associated with a given session, she must find all part +table entries, including Merge tables, and delete them in the correct order. The +mixin provides a function, `delete_downstream_parts`, to handle this, which is +run by default when calling `delete`. `delete_downstream_parts`, also aliased as `ddp`, identifies all part tables with foreign key references downstream of where it is called. If `dry_run=True`, diff --git a/docs/src/misc/session_groups.md b/docs/src/Features/SessionGroups.md similarity index 100% rename from docs/src/misc/session_groups.md rename to docs/src/Features/SessionGroups.md diff --git a/docs/src/Features/index.md b/docs/src/Features/index.md new file mode 100644 index 000000000..e8399f84a --- /dev/null +++ b/docs/src/Features/index.md @@ -0,0 +1,12 @@ +# Features + +This directory contains a series of explainers on tools that have been added to +Spyglass. + +- [Export](./Export.md) - How to export an analysis. +- [FigURL](./FigURL.md) - How to use FigURL to share figures. +- [Merge Tables](./Merge.md) - Tables for pipeline versioning. +- [Mixin](./Mixin.md) - Spyglass-specific functionalities to DataJoint tables, + including fetching NWB files, long-distance restrictions, and permission + checks on delete operations. +- [Session Groups](./SessionGroups.md) - How to operate on sets of sessions. diff --git a/docs/src/ForDevelopers/Contribute.md b/docs/src/ForDevelopers/Contribute.md new file mode 100644 index 000000000..6a58ea792 --- /dev/null +++ b/docs/src/ForDevelopers/Contribute.md @@ -0,0 +1,57 @@ +# Contributing to Spyglass + +This document provides an overview of the Spyglass development, and provides +guidance for folks looking to contribute to the project itself. For information +on setting up custom tables, skip to Code Organization. + +## Development workflow + +New contributors should follow the +[Fork-and-Branch workflow](https://www.atlassian.com/git/tutorials/comparing-workflows/forking-workflow). +See GitHub instructions +[here](https://docs.github.com/en/get-started/quickstart/contributing-to-projects). + +Regular contributors may choose to follow the +[Feature Branch Workflow](https://www.atlassian.com/git/tutorials/comparing-workflows/feature-branch-workflow) +for features that will involve multiple contributors. + +## Code organization + +- Tables are grouped into schemas by topic (e.g., `common_metrics`) +- Schemas + - Are defined in a `py` pile. + - Correspond to MySQL 'databases'. + - Are organized into modules (e.g., `common`) by folders. +- The _common_ module + - In principle, contains schema that are shared across all projects. + - In practice, contains shared tables (e.g., Session) and the first draft of + schemas that have since been split into their own + modality-specific\ + modules (e.g., `lfp`) + - Should not be added to without discussion. +- A pipeline + - Refers to a set of tables used for processing data of a particular modality + (e.g., LFP, spike sorting, position tracking). + - May span multiple schema. +- For analysis that will be only useful to you, create your own schema. + +## Misc + +- During development, we suggest using a Docker container. See + [example](../notebooks/00_Setup.ipynb). +- `numpy` style docstrings will be interpreted by API docs. To check for + compliance, monitor the output when building docs (see `docs/README.md`) + +## Making a release + +Spyglass follows [Semantic Versioning](https://semver.org/) with versioning of +the form `X.Y.Z` (e.g., `0.4.2`). + +1. In `CITATION.cff`, update the `version` key. +2. Make a pull request with changes. +3. After the pull request is merged, pull this merge commit and tag it with + `git tag {version}` +4. Publish the new release tag. Run `git push origin {version}`. This will + rebuild docs and push updates to PyPI. +5. Make a new + [release on GitHub](https://docs.github.com/en/repositories/releasing-projects-on-github/managing-releases-in-a-repository). diff --git a/docs/src/misc/database_management.md b/docs/src/ForDevelopers/Management.md similarity index 100% rename from docs/src/misc/database_management.md rename to docs/src/ForDevelopers/Management.md diff --git a/docs/src/ForDevelopers/Reuse.md b/docs/src/ForDevelopers/Reuse.md new file mode 100644 index 000000000..baf969ecc --- /dev/null +++ b/docs/src/ForDevelopers/Reuse.md @@ -0,0 +1,359 @@ +# Coding for Reuse + + + +*Reusing code requires that it be faster to read and change than it would be to +start from scratch.* + +We can speed up that process by ... + +1. Making reading predictable. +2. Atomizing - separating pieces into the smallest meaningful chunks. +3. Leaving notes via type hints, docstrings, and comments +4. Getting ahead of errors +5. Automating as much of the above as possible. + +This document pulls from resources like +[Tidy First](https://www.oreilly.com/library/view/tidy-first/9781098151232/) and +[SOLID Principles](https://arjancodes.com/blog/solid-principles-in-python-programming/). +Experienced object-oriented developers may find these principles familiar. + +## Predictable Formatting + +- Many programming languages offer flexibility in how they are written. +- Tools like `black` and `isort` take away stylistic preferences in favor of one + norm. +- Strict line limits (e.g., 80) make it easier to do side by side comparisons in + git interfaces. +- `black` is also useful for detecting an error on save - if it doesn't run on + what you wrote, there's an error somewhere. + +Let's look at a few examples of the same code block formatted different ways... + +### Original + +```python +def get_data_interface(nwbfile, data_interface_name, data_interface_class=None, unused_other_arg=None): + ret = { 'centroid_method': "two_pt_centroid", 'points': {'point1': 'greenLED', "point2": 'redLED_C'}, 'interpolate': True} + for module in nwbfile.processing.values(): + match = module.data_interfaces.get(data_interface_name, None) + if match is not None: + if data_interface_class is not None and not isinstance(match, data_interface_class): + continue + ret.append(match) + if len(ret) > 1: + print(f"Multiple data interfaces with name '{data_interface_name}' found with identifier {nwbfile.identifier}.") + if len(ret) >= 1: + return ret[0] + return None +``` + +### Black formatted + +With `black`, we have a limited line length and indents reflect embedding. + +```python +def get_data_interface( # Each arg gets its own line + nwbfile, + data_interface_name, + data_interface_class=None, + unused_other_arg=None, +): + ret = { # dictionaries show embedding + "centroid_method": "two_pt_centroid", + "points": { + "point1": "greenLED", + "point2": "redLED_C", + }, + "interpolate": True, + } + for module in nwbfile.processing.values(): + match = module.data_interfaces.get(data_interface_name, None) + if match is not None: + if data_interface_class is not None and not isinstance( + match, data_interface_class + ): # long lines broken up + continue + ret.append(match) + if len(ret) > 1: + print( # long strings need to be broken up manually + f"Multiple data interfaces with name '{data_interface_name}' " + f"found in NWBFile with identifier {nwbfile.identifier}. " + ) + if len(ret) >= 1: + return ret[0] + return None +``` + +### Control flow adjustments + +Although subjective, we can do even better by adjusting the logic to follow how +we read. + +```python +from typing import Type +def get_data_interface(...): + ret = {...} + # decide no input early + data_interface_class = data_interface_class or Type + for match in [ # generate via list comprehension + module.get_data_interface(data_interface_name) + for module in nwbfile.processing.values() + ]: # only process good case, no `continue` + if match and isinstance(match, data_interface_class): + ret.append(match) + if len(ret) > 1: + print(...) + return ret[0] if len(ret) >= 1 else None # fits on one line +``` + +## Atomizing + +Working memory limits our ability to understand long code blocks. + +We can extract pieces into separate places to give them a name and make 'one' +memory chunk out of a set of functions. + +Depending on the scope, chunks can be separated with ... + +1. Paragraph breaks - to group instructions together. +2. Conditional assignment - for data maps local to a function. +3. Methods of a class - for functions that deserve a separate name. +4. Helpers in a script - for functions used multiple times in a schema. +5. Util scripts in a package - for functions used throughout a project. + +### Atomizing example + +- Let's read the next function as if we're revisiting old code. +- This example was taken from an existing project and adjusted for + demonstration. +- Please review without commentary and make mental notes ow what each part line + is doing and how they relate to other lines. + +
No commentary + +```python +class MyTable(dj.Computed): + ... + + def make(self, key): + rat_name = key["rat_name"] + ron_all_dict = {"some_data": 1} + tonks_all_dict = {"other_data": 2} + try: + if len((OtherTable & key).fetch("cluster_id")[0]) > 0: + if rat_name == "ron": + data_dict = ron_all_dict + elif rat_name == "tonks": + data_dict = tonks_all_dict + else: + raise ValueError(f"Unsupported rat {rat_name}") + for data_key, data_value in data_dict.items(): + try: + if data_value == 1: + cluster_spike_times = (OtherTable & key).fetch_nwb()[ + 0 + ]["units"]["spike_times"] + else: + cluster_spike_times = (OtherTable & key).fetch_nwb()[ + data_value - 1 + ]["units"]["spike_times"][data_key] + self.insert1(cluster_spike_times) + except KeyError: + print("cluster missing", key["nwb_file_name"]) + else: + print("no spikes") + except IndexError: + print("no data") +``` + +
+ +
With Commentary + +Note how the numbers correspond to their counterparts - 1Q, 1A, 2Q, 2A ... + +```python +class MyTable(dj.Computed): + ... + def make(self, key): + rat_name = key["rat_name"] # 1Q. Can this function handle others? + ron_all_dict = {"some_data": 1} # 2Q. Are these parameters? + tonks_all_dict = {"other_data": 2} + try: # 3Q. What error could be thrown? And by what? + if len((OtherTable & key).fetch("cluster_id")[0]) > 0: # 4Q. What happens if none? + if rat_name == "ron": + data_dict = ron_all_dict # 2A. ok, we decide the data here + elif rat_name == "tonks": + data_dict = tonks_all_dict + else: # 1Q. Ok, we can only do these two + raise ValueError(f"Unsupported rat {rat_name}") + for data_key, data_value in data_dict.items(): # 2A. Maybe parameter? + try: # 5Q. What could throw an error? + if data_value == 1: + cluster_spike_times = (OtherTable & key).fetch_nwb()[ + 0 + ]["units"]["spike_times"] # 6Q. What do we need this for? + else: + cluster_spike_times = (OtherTable & key).fetch_nwb()[ + data_value - 1 + ]["units"]["spike_times"][data_key] + self.insert1(cluster_spike_times) # 6A. Ok, insertion + except KeyError: # 5A. Mayble this fetch is unreliable? + print("cluster missing", key["nwb_file_name"]) + else: + print("no spikes") # 4A. Ok we bail if no clusters + except IndexError: # 3A. What could have thrown this? Are we sure nothing else? + print("no data") +``` + +
+ +### Embedding + +- The process of stream of consciousness coding often generates an embedding + trail from core out +- Our mental model of A -> B -> C -> D may actually read like `D( C( B( A )))` + or ... + +1. Prepare for D +2. Open a loop for C +3. Add caveat B +4. Do core process A +5. Check other condition B +6. Close D + +Let's contrast with an approach that reduces embedding. + +```python +class MyTable(dj.Computed): + ... + def _get_cluster_times(self, key, nth_file, index): # We will need times + clust = (OtherTable & key).fetch_nwb()[nth_file]["units"]["spike_times"] + try: # Looks like this indexing may not return the data + return clust[index] if nth_file == 0 else clust # if/then handled here + except KeyError: # Show as err, keep moving + logger.error("Cluster missing", key["nwb_file_name"]) + + def make(self, key): + rat_paramsets = {"ron": {"some_data": 1}, "tonks": {"other_data": 2}} # informative variable name + if (rat_name := key["rat_name"]) not in rat_paramsets: # walrus operator `:=` can assign within `if` + raise ValueError(f"Unsupported rat {rat_name}") # we can only handle a subset a rats + rat_params = rat_paramsets[rat_name] # conditional assignment + + if not len((OtherTable & key).fetch("cluster_id")[0]): # paragraph breaks separate chunks conceptually + logger.info(f"No spikes for {key}") # log level can be adjusted at run + + insertion_list = [] # We're gonna insert something + for file_index, file_n in rat_params.items(): + insertion_list.append( + self._get_cluster_times(key, file_n - 1, file_index) # there it is, clusters + ) + self.insert(insertion_list) # separate inserts to happen all at once +``` + +## Comments, Type hints and docstrings + +It's tempting to leave comments in code, but they can become outdated and +confusing. Instead try Atomizing and using Type hints and docstrings. + +Type hints are not enforced, but make it much easier to tell the design intent +when reread. Docstrings are similarly optional, but make it easy to get prompts +without looking at the code again via `help(myfunc)` + +### Type hints + +```python +def get_data_interface( + nwbfile: pynwb.Nwbfile, + data_interface_name: Union[str, list], # one or the other + other_arg: Dict[str, Dict[str, dj.FreeTable]] = None, # show embedding +) -> NWBDataInterface: # What it returns. `None` if no return + pass +``` + +### Docstrings + +- Spyglass uses the NumPy docstring style, as opposed to Google. +- These are rendered in the + [API documentation](https://lorenfranklab.github.io/spyglass/latest/api/utils/nwb_helper_fn/#src.spyglass.utils.nwb_helper_fn.get_data_interface) + +```python +def get_data_interface(*args, **kwargs): + """One-line description. + + Additional notes or further description in case the one line above is + not enough. + + Parameters + ---------- + nwbfile : pynwb.NWBFile + Description of the arg. e.g., The NWB file object to search in. + data_interface_name : Union[str, list] + More here. + data_interface_class : Dict[str, Dict[str, dj.FreeTable]], optional + more here + + Warns + ----- + LoggerWarning + Why warn. + + Raises + ------ + ValueError + Why it would hit this error. + + Returns + ------- + data_interface : NWBDataInterface + + Example + ------- + > data_interface = get_data_interface(mynwb, "interface_name") + """ + pass +``` + +## Error detection with linting + +- Packages like `ruff` can show you bad code 'smells' while you write and fix + some for you. +- PEP8, Flake8 and other standards will flag issues like ... + - F401: Module imported but unused + - E402: Module level import not at top of file + - E713: Test for membership should be 'not in' +- `black` will fix a subset of Flake8 issues, but not all. `ruff` identifies or + fixes these rules and [many others](https://docs.astral.sh/ruff/rules/). + +## Automation + +- `black`, `isort`, and `ruff` can be run on save in most IDEs by searching + their extensions. +- `pre-commit` is a tool that can be used to run these checks before each + commit, ensuring that all your code is formatted, as defined in a `yaml` + file. + +```yaml +default_stages: [commit, push] +exclude: (^.github/|^docs/site/|^images/) + +repos: + - repo: https://github.com/ambv/black + rev: 24.1.1 + hooks: + - id: black + language_version: python3.9 + + - repo: https://github.com/codespell-project/codespell + rev: v2.2.6 + hooks: + - id: codespell + args: [--toml, pyproject.toml] + additional_dependencies: + - tomli +``` diff --git a/docs/src/ForDevelopers/Schema.md b/docs/src/ForDevelopers/Schema.md new file mode 100644 index 000000000..65df014ba --- /dev/null +++ b/docs/src/ForDevelopers/Schema.md @@ -0,0 +1,483 @@ +# Schema Design + +This document gives a detailed overview of how to read a schema script, +including explations of the different components that define a pipeline. + +1. Goals of a schema +2. Front matter + 1. Imports + 2. Schema declaration +3. Table syntax + 1. Class inheritance + 2. Explicit table types + 3. Definitions + 4. Methods +4. Conceptual table types + +Some of this will be redundant with general Python best practices and DataJoint +documentation, but it is important be able to read a schema, espically if you +plan to write your own. + +Later sections will depend on information presented in the article on +[Table Types](./TableTypes.md). + +## Goals of a schema + +- At its core, DataJoint is just a mapping between Python and SQL. +- SQL is a language for managing relational databases. +- DataJoint is opinionated about how to structure the database, and limits SQL's + potential options in way that promotes good practices. +- Python stores ... + - A copy of table definitions, that may be out of sync with the database. + - Methods for processing data, that may be out of sync with existing data. + +Good data provenance requires good version control and documentation to keep +these in sync. + +## Example schema + +This is the full example schema referenced in subsections below. + +
Full Schema + +```python +"""Schema example for custom pipelines + +Note: `noqa: F401` is a comment that tells linters to ignore the fact that +`Subject` seems unused in the file. If this table is only used in a table +definition string, the linter will not recognize it as being used. +""" + +import random # Package import +from typing import Union # Individual class import +from uuid import UUID + +import datajoint as dj # Aliased package import +from custom_package.utils import process_df, schema_prefix # custom functions +from spyglass.common import RawPosition, Subject # noqa: F401 +from spyglass.utils import SpyglassMixin # Additional Spyglass features + +schema = dj.schema(schema_prefix + "_example") # schema name from string + + +# Model to demonstrate DataJoint syntax +@schema # Decorator to define a table in the schema on the server +class ExampleTable(SpyglassMixin, dj.Manual): # Inherit SpyglassMixin class + """Table Description""" # Table docstring, one-line if possible + + definition = """ # Table comment + primary_key1 : uuid # randomized string + primary_key2 : int # integer + --- + secondary_key1 : varchar(32) # string of max length 32 + -> Subject # Foreign key reference, inherit primary key of this table + """ + + +# Model to demonstrate field aliasing with `proj` +@schema +class SubjBlinded(SpyglassMixin, dj.Manual): + """Blinded subject table.""" # Class docstring for `help()` + + definition = """ + subject_id: uuid # id + --- + -> Subject.proj(actual_id='subject_id') + """ + + @property # Static information, Table.property + def pk(self): + """Return the primary key""" # Function docstring for `help()` + return self.heading.primary_key + + @staticmethod # Basic func with no reference to self instance + def _subj_dict(subj_uuid: UUID): # Type hint for argument + """Return the subject dict""" + return {"subject_id": subj_uuid} + + @classmethod # Class, not instance. Table.func(), not Table().func() + def hash(cls, argument: Union[str, dict] = None): # Default value + """Example class method""" + return dj.hash.key_hash(argument) + + def blind_subjects(self, restriction: Union[str, dict]): # Union is "or" + """Import all subjects selected by the restriction""" + insert_keys = [ + { + **self._subj_dict(self.hash(key)), + "actual_id": key["subject_id"], + } + for key in (Subject & restriction).fetch("KEY") + ] + self.insert(insert_keys, skip_duplicates=True) + + def return_subj(self, key: str): + """Return the entry in subject table""" + if isinstance(key, dict): # get rid of extra values + key = key["subject_id"] + key = self._subj_dict(key) + actual_ids = (self & key).fetch("actual_id") + ret = [{"subject_id": actual_id} for actual_id in actual_ids] + return ret[0] if len(ret) == 1 else ret + + +@schema +class MyParams(SpyglassMixin, dj.Lookup): # Lookup allows for default values + """Parameter table.""" + + definition = """ + param_name: varchar(32) + --- + params: blob + """ + contents = [ # Default values as list of tuples + ["example1", {"A": 1, "B": 2}], + ["example2", {"A": 3, "B": 4}], + ] + + @classmethod + def insert_default(cls): # Not req for dj.Lookup, but Spyglass convention + """Insert default values.""" # skip_duplicates prevents errors + cls().insert(rows=cls.contents, skip_duplicates=True) + + +@schema +class MyAnalysisSelection(SpyglassMixin, dj.Manual): + """Selection table.""" # Pair subjects and params for computation + + definition = """ + -> SubjBlinded + -> MyParams + """ + + def insert_all(self, param_name="example1"): # Optional helper function + """Insert all subjects with given param name""" + self.insert( + [ + {**subj_key, "param_name": param_name} + for subj_key in SubjBlinded.fetch("KEY") + ], + skip_duplicates=True, + ) + + +@schema +class MyAnalysis(SpyglassMixin, dj.Computed): + """Analysis table.""" + + # One or more foreign keys, no manual input + definition = """ + -> MyAnalysisSelection + """ + + class MyPart(SpyglassMixin, dj.Part): + """Part table.""" + + definition = """ + -> MyAnalysis + --- + result: int + """ + + def make(self, key): + # Prepare for computation + this_subj = SubjBlinded().return_subj(key["subject_id"]) + param_key = {"param_name": key["param_name"]} + these_param = (MyParams & param_key).fetch1("params") + + # Perform computation. + # Ideally, all data is linked with foreign keys, but not enforced + for pos_obj in RawPosition.PosObject * (Subject & this_subj): + dataframe = (RawPosition.PosObject & pos_obj).fetch1_dataframe() + result = process_df(dataframe, **these_param) + + part_inserts = [] # Prepare inserts, to minimize insert calls + for _ in range(10): + result += random.randint(0, 100) + part_inserts.append(dict(key, result=result)) + + self.insert1(key) # Insert into 'master' first, then all parts + self.MyPart().insert(rows=part_inserts, skip_duplicates=True) +``` + +
+ +## Front matter + +At the beginning of the schema file, you'll find ... + +- Script docstring +- Imports + - Aliased imports + - Package imports + - Individual imports + - Relative imports +- Schema declaration + +```python +"""Schema example for custom pipelines + +Note: `noqa: F401` is a comment that tells linters to ignore the fact that +`Subject` seems unused in the file. If this table is only used in a table +definition string, the linter will not recognize it as being used. +""" + +import random # Package import +from typing import Union # Individual class import +from uuid import UUID + +import datajoint as dj # Aliased package import +from custom_package.utils import process_df, schema_prefix # custom functions +from spyglass.common import RawPosition, Subject # noqa: F401 +from spyglass.utils import SpyglassMixin # Additional Spyglass features + +schema = dj.schema(schema_prefix + "_example") # schema name from string +``` + +- The `schema` variable determines the name of the schema in the database. +- Existing schema prefixes (e.g., `common`) should not be added to without + discussion with the Spyglass team. +- Database admins may be interested in limiting privileges on a per-prefix + basis. For example, Frank Lab members use ... +- Their respective usernames for solo work +- Project-specific prefixes for shared work. + +## Table syntax + +Each table is defined as a Python class, with a `definition` attribute that +contains the SQL-like table definition. + +### Class inheritance + +The parentheses in the class definition indicate that the class inherits from. + +This table is ... + +- A `SpyglassMixin` class, which provides a number of useful methods specific to + Spyglass as discussed in the [mixin article](../Features/Mixin.md). +- A DataJoint `Manual` table, which is a table that is manually populated. + +```python +@schema # Decorator to define a table in the schema on the server +class ExampleTable(SpyglassMixin, dj.Manual): # Inherit SpyglassMixin class + pass +``` + +### Table types + +- [DataJoint types](https://datajoint.com/docs/core/datajoint-python/0.14/design/tables/tiers/): + - `Manual` tables are manually populated. + - `Lookup` tables can be populated on declaration, and rarely change. + - `Computed` tables are populated by a method runs computations on upstream + entries. + - `Imported` tables are populated by a method that imports data from another + source. + - `Part` tables are used to store data that is conceptually part of another + table. +- [Spyglass conceptual types](./TableTypes.md): + - Optional upstream Data tables from a previous pipeline. + - Parameter tables (often `dj.Lookup`) store parameters for analysis. + - Selection tables store pairings of parameters and data to be analyzed. + - Compute tables (often `dj.Computed`) store the results of analysis. + - Merge tables combine data from multiple pipeline versions. + +### Definitions + +Each table can have a docstring that describes the table, and must have a +`definition` attribute that contains the SQL-like table definition. + +- `#` comments are used to describe the table and its columns. + +- `---` separates the primary key columns from the data columns. + +- `field : datatype` defines a column using a + [SQL datatype](https://datajoint.com/docs/core/datajoint-python/0.14/design/tables/attributes/) + + +- `->` indicates a foreign key reference to another table. + +```python +@schema # Decorator to define a table in the schema on the server +class ExampleTable(SpyglassMixin, dj.Manual): # Inherit SpyglassMixin class + """Table Description""" # Table docstring, one-line if possible + + definition = """ # Table comment + primary_key1 : uuid # randomized string + primary_key2 : int # integer + --- + secondary_key1 : varchar(32) # string of max length 32 + -> Subject # Foreign key reference, inherit primary key of this table + """ +``` + +### Methods + +Many Spyglss tables have methods that provide functionality for the pipeline. + +Check out our [API documentation](../api/index.md) for a full list of available +methods. + +This example models subject blinding to demonstrate ... + +- An aliased foreign key in the definition, using `proj` to rename the field. +- A static property that returns the primary key. +- A static method that returns a dictionary of subject information. +- A class method that hashes an argument. +- An instance method that self-inserts subjects based on a restriction. +- An instance method that returns the unblinded subject information. + +```python +# Model to demonstrate field aliasing with `proj` +@schema +class SubjBlinded(SpyglassMixin, dj.Manual): + """Blinded subject table.""" # Class docstring for `help()` + + definition = """ + subject_id: uuid # id + --- + -> Subject.proj(actual_id='subject_id') + """ + + @property # Static information, Table.property + def pk(self): + """Return the primary key""" # Function docstring for `help()` + return self.heading.primary_key + + @staticmethod # Basic func with no reference to self instance + def _subj_dict(subj_uuid: UUID): # Type hint for argument + """Return the subject dict""" + return {"subject_id": subj_uuid} + + @classmethod # Class, not instance. Table.func(), not Table().func() + def hash(cls, argument: Union[str, dict] = None): # Default value + """Example class method""" + return dj.hash.key_hash(argument) + + def blind_subjects(self, restriction: Union[str, dict]): # Union is "or" + """Import all subjects selected by the restriction""" + insert_keys = [ + { + **self._subj_dict(self.hash(key)), + "actual_id": key["subject_id"], + } + for key in (Subject & restriction).fetch("KEY") + ] + self.insert(insert_keys, skip_duplicates=True) + + def return_subj(self, key: str): + """Return the entry in subject table""" + if isinstance(key, dict): # get rid of extra values + key = key["subject_id"] + key = self._subj_dict(key) + actual_ids = (self & key).fetch("actual_id") + ret = [{"subject_id": actual_id} for actual_id in actual_ids] + return ret[0] if len(ret) == 1 else ret +``` + +### Example Table Types + +#### Params Table + +This stores the set of values that may be used in an analysis. For analyses that +are unlikely to change, consider specifying all parameters in the table's +secondary keys. For analyses that may have different parameters, of when +depending on outside packages, consider a `blob` datatype that can store a +python dictionary. + +```python +@schema +class MyParams(SpyglassMixin, dj.Lookup): # Lookup allows for default values + """Parameter table.""" + + definition = """ + param_name: varchar(32) + --- + params: blob + """ + contents = [ # Default values as list of tuples + ["example1", {"A": 1, "B": 2}], + ["example2", {"A": 3, "B": 4}], + ] + + @classmethod + def insert_default(cls): # Not req for dj.Lookup, but Spyglass convention + """Insert default values.""" # skip_duplicates prevents errors + cls().insert(rows=cls.contents, skip_duplicates=True) +``` + +#### Selection Table + +This is the staging area to pair sessions with parameter sets. Depending on what +is inserted, you might pair the same subject with different parameter sets, or +different subjects with the same parameter set. + +```python +@schema +class MyAnalysisSelection(SpyglassMixin, dj.Manual): + """Selection table.""" # Pair subjects and params for computation + + definition = """ + -> SubjBlinded + -> MyParams + """ + + def insert_all(self, param_name="example1"): # Optional helper function + """Insert all subjects with given param name""" + self.insert( + [ + {**subj_key, "param_name": param_name} + for subj_key in SubjBlinded.fetch("KEY") + ], + skip_duplicates=True, + ) +``` + +#### Compute Table + +This is how processing steps are paired with data entry. By running +`MyAnalysis().populate()`, the `make` method is called for each foreign key +pairing in the selection table. The `make` method should end in one or one +inserts into the compute table. + +```python +@schema +class MyAnalysis(SpyglassMixin, dj.Computed): + """Analysis table.""" + + # One or more foreign keys, no manual input + definition = """ + -> MyAnalysisSelection + """ + + class MyPart(SpyglassMixin, dj.Part): + """Part table.""" + + definition = """ + -> MyAnalysis + --- + result: int + """ + + def make(self, key): + # Prepare for computation + this_subj = SubjBlinded().return_subj(key["subject_id"]) + param_key = {"param_name": key["param_name"]} + these_param = (MyParams & param_key).fetch1("params") + + # Perform computation. + # Ideally, all data is linked with foreign keys, but not enforced + for pos_obj in RawPosition.PosObject * (Subject & this_subj): + dataframe = (RawPosition.PosObject & pos_obj).fetch1_dataframe() + result = process_df(dataframe, **these_param) + + part_inserts = [] # Prepare inserts, to minimize insert calls + for _ in range(10): + result += random.randint(0, 100) + part_inserts.append(dict(key, result=result)) + + self.insert1(key) # Insert into 'master' first, then all parts + self.MyPart().insert(rows=part_inserts, skip_duplicates=True) +``` + +To see how tables of a given schema relate to one another, use a +[schema diagram](https://datajoint.com/docs/core/datajoint-python/0.14/design/diagrams/) diff --git a/docs/src/ForDevelopers/TableTypes.md b/docs/src/ForDevelopers/TableTypes.md new file mode 100644 index 000000000..5a040f94f --- /dev/null +++ b/docs/src/ForDevelopers/TableTypes.md @@ -0,0 +1,108 @@ +# Table Types + +Spyglass uses DataJoint's default +[table tiers](https://datajoint.com/docs/core/datajoint-python/0.14/design/tables/tiers/). + +By convention, an individual pipeline has one or more the following table types: + +- Common/Multi-pipeline table +- NWB ingestion table +- Parameters table +- Selection table +- Data table +- Merge Table (see also [stand-alone doc](../Features/Merge.md)) + +## Common/Multi-pipeline + +Tables shared across multiple pipelines for shared data types. + +- Naming convention: None +- Data tier: `dj.Manual` +- Examples: `IntervalList` (time interval for any analysis), `AnalysisNwbfile` + (analysis NWB files) + +_Note_: Because these are stand-alone tables not part of the dependency +structure, developers should include enough information to link entries back to +the pipeline where the data is used. + +## NWB ingestion + +Automatically populated when an NWB file is ingested (i.e., `dj.Imported`) to +keep track of object hashes (i.e., `object_id`) in the NWB file. All such tables +should be included in the `make` method of `Session`. + +- Naming convention: None +- Data tier: `dj.Imported` +- Primary key: foreign key from `Session` +- Non-primary key: `object_id`, the unique hash of an object in the NWB file. +- Examples: `Raw`, `Institution`, etc. +- Required methods: + - `make`: must read information from an NWB file and insert it to the table. + - `fetch_nwb`: retrieve the data specified by the object ID. + +## Parameters + +Stores the set of values that may be used in an analysis. + +- Naming convention: end with `Parameters` or `Params` +- Data tier: `dj.Manual`, or `dj.Lookup` +- Primary key: `{pipeline}_params_name`, `varchar` +- Non-primary key: `{pipeline}_params`, `blob` - dict of parameters +- Examples: `RippleParameters`, `DLCModelParams` +- Possible method: if `dj.Manual`, include `insert_default` + +_Notes_: Some early instances of Parameter tables (a) used non-primary keys for +each individual parameter, and (b) use the Manual rather than Lookup tier, +requiring a class method to insert defaults. + +## Selection + +A staging area to pair sessions with parameter sets, allowing us to be selective +in the analyses we run. It may not make sense to pair every paramset with every +session. + +- Naming convention: end with `Selection` +- Data tier: `dj.Manual` +- Primary key(s): Foreign key references to + - one or more NWB or data tables + - optionally, one or more parameter tables +- Non-primary key: None +- Examples: `MetricSelection`, `LFPSelection` + +It is possible for a Selection table to collect information from more than one +Parameter table. For example, the Selection table for spike sorting holds +information about both the interval (`SortInterval`) and the group of electrodes +(`SortGroup`) to be sorted. + +## Data + +The output of processing steps associated with a selection table. Has a `make` +method that carries out the computation specified in the Selection table when +`populate` is called. + +- Naming convention: None +- Data tier: `dj.Computed` +- Primary key: Foreign key reference to a Selection table. +- Non-primary key: `analysis_file_name` inherited from `AnalysisNwbfile` table + (i.e., name of the analysis NWB file that will hold the output of the + computation). +- Required method, `make`: carries out the computation and insert a new entry; + must also create an analysis NWB file and insert it to the `AnalysisNwbfile` + table. Note that this method is never called directly; it is called via + `populate`. Multiple entries can be run in parallel when called with + `reserve_jobs=True`. +- Example: `QualityMetrics`, `LFPV1` + +## Merge + +Following a convention outlined in [a dedicated doc](../Features/Merge.md), +merges the output of different pipelines dedicated to the same modality as part +tables (e.g., common LFP, LFP v1, imported LFP) to permit unified downstream +processing. + +- Naming convention: `{Pipeline}Output` +- Data tier: custom `_Merge` class +- Primary key: `merge_id`, `uuid` +- Non-primary key: `source`, `varchar` table name associated with that entry +- Required methods: None - see custom class methods with `merge_` prefix +- Example: `LFPOutput`, `PositionOutput` diff --git a/docs/src/ForDevelopers/UsingNWB.md b/docs/src/ForDevelopers/UsingNWB.md new file mode 100644 index 000000000..3f68f930e --- /dev/null +++ b/docs/src/ForDevelopers/UsingNWB.md @@ -0,0 +1,269 @@ +# Using NWB + +This article explains how to use the NWB format in Spyglass. It covers the +naming conventions, storage locations, and the relationships between NWB files +and other tables in the database. + +## NWB files + +NWB files contain everything about the experiment and form the starting point of +all analyses. + +- Naming: `{animal name}YYYYMMDD.nwb` +- Storage: + - On disk, directory identified by `settings.py` as `raw_dir` (e.g., + `/stelmo/nwb/raw`) + - In database, in the `Nwbfile` table +- Copies: + - made with an underscore `{animal name}YYYYMMDD_.nwb` + - stored in the same `raw_dir` + - contain pointers to objects in original file + - permit adding new parts to the NWB file without risk of corrupting the + original data + +## Analysis files + +Hold the results of intermediate steps in the analysis. + +- Naming: `{animal name}YYYYMMDD_{10-character random string}.nwb` +- Storage: + - On disk, directory identified by `settings.py` as `analysis_dir` (e.g., + `/stelmo/nwb/analysis`). Items are further sorted into folders matching + original NWB file name + - In database, in the `AnalysisNwbfile` table. +- Examples: filtered recordings, spike times of putative units after sorting, or + waveform snippets. + +_Note_: Because NWB files and analysis files exist both on disk and listed in +tables, these can become out of sync. You can 'equalize' the database table +lists and the set of files on disk by running `cleanup` method, which deletes +any files not listed in the table from disk. + +## Reading and writing recordings + +Recordings start out as an NWB file, which is opened as a +`NwbRecordingExtractor`, a class in `spikeinterface`. When using `sortingview` +for visualizing the results of spike sorting, this recording is saved again in +HDF5 format. This duplication should be resolved in the future. + +## Naming convention + +The following objects should be uniquely named. + +- _Recordings_: Underscore-separated concatenations of uniquely defining + features, + `NWBFileName_IntervalName_ElectrodeGroupName_PreprocessingParamsName`. +- _SpikeSorting_: Adds `SpikeSorter_SorterParamName` to the name of the + recording. +- _Waveforms_: Adds `_WaveformParamName` to the name of the sorting. +- _Quality metrics_: Adds `_MetricParamName` to the name of the waveform. +- _Analysis NWB files_: + `NWBFileName_IntervalName_ElectrodeGroupName_PreprocessingParamsName.nwb` +- Each recording and sorting is given truncated UUID strings as part of + concatenations. + +Following broader Python conventions, methods a method that will not be +explicitly called by the user should start with `_` + +## Time + +The `IntervalList` table stores all time intervals in the following format: +`[start_time, stop_time]`, which represents a contiguous time of valid data. +These are used to exclude any invalid timepoints, such as missing data from a +faulty connection. + +- Intervals can be nested for a set of disjoint intervals. +- Some recordings have explicit + [PTP timestamps](https://en.wikipedia.org/wiki/Precision_Time_Protocol) + associated with each sample. Some older recordings are missing PTP times, + and times must be inferred from the TTL pulses from the camera. + +## Object-Table mappings + +The following tables highlight the correspondence between NWB objects and +Spyglass tables/fields and should be a useful reference for developers looking +to adapt existing NWB files for Spyglass injestion. + +Please contact the developers if you have any questions or need help with +adapting your NWB files for use with Spyglass, especially items marked with +'TODO' in the tables below. + + NWBfile Location: nwbf
Object type: pynwb.file.NWBFile
+ +| Spyglass Table | Key | NWBfile Location | Config option | Notes | +| :------------------- | :-----------------------: | -----------------------------: | --------------------------------------------: | ---------------------------: | +| Institution | institution_name | nwbf.institution | config\["Institution"\]\["institution_name"\] | str | +| Session | institution_name | nwbf.institution | config\["Institution"\]\["institution_name"\] | str | +| Lab | lab_name | nwbf.lab | config\["Lab"\]\["lab_name"\] | str | +| Session | lab_name | nwbf.lab | config\["Lab"\]\["lab_name"\] | str | +| LabMember | lab_member_name | nwbf.experimenter | config\["LabMember"\]\["lab_member_name"\] | str("last_name, first_name") | +| Session.Experimenter | lab_member_name | nwbf.experimenter | config\["LabMember"\]\["lab_member_name"\] | str("last_name, first_name") | +| Session | session_id | nwbf.session_id | XXX | | +| Session | session_description | nwbf.session_description | XXX | | +| Session | session_start_time | nwbf.session_start_time | XXX | | +| Session | timestamps_reference_time | nwbf.timestamps_reference_time | XXX | | +| Session | experiment_description | nwbf.experiment_description | XXX | | + + NWBfile Location: nwbf.subject
Object type: pynwb.file.Subject
+ +| Spyglass Table | Key | NWBfile Location | Config option | Notes | +| :------------- | :---------: | -----------------------: | -----------------------------------: | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| Subject | subject_id | nwbf.subject.subject_id | config\["Subject"\]\["subject_id"\] | | +| Subject | age | nwbf.subject.age | config\["Subject"\]\["age"\] | Dandi requires age must be in ISO 8601 format, e.g. "P70D" for 70 days, or, if it is a range, must be "\[lower\]/\[upper\]", e.g. "P10W/P12W", which means "between 10 and 12 weeks" | +| Subject | description | nwbf.subject.description | config\["Subject"\]\["description"\] | | +| Subject | genotype | nwbf.subject.genotype | config\["Subject"\]\["genotype"\] | | +| Subject | species | nwbf.subject.species | config\["Subject"\]\["species"\] | Dandi upload requires species either be in Latin binomial form (e.g., 'Mus musculus' and 'Homo sapiens') or be a NCBI taxonomy link | +| Subject | sex | nwbf.subject.sex | config\["Subject"\]\["sex"\] | single character identifier (e.g. "F", "M", "U","O") | +| Session | subject_id | nwbf.subject.subject_id | config\["Subject"\]\["subject_id"\] | str("animal_name") | + + NWBfile Location: nwbf.devices
Object type: +ndx_franklab_novela.DataAcqDevice
+ +| Spyglass Table | Key | NWBfile Location | Config option | Notes | +| :----------------------------- | :-------------------------------: | -------------------------------------: | -----------------------------------------------------------------------: | ----: | +| DataAcquisitionDevice | data_acquisition_device_name | nwbf.devices.\<\*DataAcqDevice>.name | config\["DataAcquisitionDevice"\]\["data_acquisition_device_name"\] | | +| DataAcquisitionDevice | adc_circuit | nwbf.devices.\<\*DataAcqDevice>.name | config\["DataAcquisitionDevice"\]\["data_acquisition_device_name"\] | | +| DataAcquisitionDeviceSystem | data_acquisition_device_system | nwbf.devices.\<\*DataAcqDevice>.system | config\["DataAcquisitionDevice"\]\["data_acquisition_device_system"\] | | +| DataAcquisitionDeviceAmplifier | data_acquisition_device_amplifier | nwbf.devices.\<\*DataAcqDevice>.system | config\["DataAcquisitionDevice"\]\["data_acquisition_device_amplifier"\] | | + + NWBfile Location: nwbf.devices
Object type: +ndx_franklab_novela.CameraDevice
+ +| Spyglass Table | Key | NWBfile Location | Config option | Notes | +| :------------- | :-----------------: | ----------------------------------------------: | ------------------------------------------------------: | ----: | +| CameraDevice | camera_id | nwbf.devices.\<\*CameraDevice>.camera_id | config\["CameraDevice"\]\[index\]\["camera_id"\] | int | +| CameraDevice | camera_name | nwbf.devices.\<\*CameraDevice>.camera_name | config\["CameraDevice"\]\[index\]\["camera_name"\] | str | +| CameraDevice | camera_manufacturer | nwbf.devices.\<\*CameraDevice>.manufacturer | config\["CameraDevice"\]\[index\]\["manufacturer"\] | str | +| CameraDevice | model | nwbf.devices.\<\*CameraDevice>.model | config\["CameraDevice"\]\[index\]\["model"\] | str | +| CameraDevice | lens | nwbf.devices.\<\*CameraDevice>.lens | config\["CameraDevice"\]\[index\]\["lens"\] | str | +| CameraDevice | meters_per_pixel | nwbf.devices.\<\*CameraDevice>.meters_per_pixel | config\["CameraDevice"\]\[index\]\["meters_per_pixel"\] | str | + + NWBfile Location: nwbf.devices
Object type: ndx_franklab_novela.Probe +
+ +| Spyglass Table | Key | NWBfile Location | Config option | Notes | +| :------------- | :---------------: | ----------------------------------------: | -----------------------------------------: | ----: | +| Probe | probe_type | nwbf.devices.\<\*Probe>.probe_type | config\["Probe"\]\[index\]\["probe_type"\] | str | +| Probe | probe_id | nwbf.devices.\<\*Probe>.probe_type | XXX | str | +| Probe | manufacturer | nwbf.devices.\<\*Probe>.manufacturer | XXX | str | +| Probe | probe_description | nwbf.devices.\<\*Probe>.probe_description | XXX | str | +| Probe | num_shanks | nwbf.devices.\<\*Probe>.num_shanks | XXX | int | + + NWBfile Location: nwbf.devices.\<\*Probe>.\<\*Shank>
Object type: +ndx_franklab_novela.Shank
+ +| Spyglass Table | Key | NWBfile Location | Config option | Notes | +| :------------- | :---------: | ---------------------------------------------: | ------------: | ----: | +| Probe.Shank | probe_shank | nwbf.devices.\<\*Probe>.\<\*Shank>.probe_shank | XXX | int | + + NWBfile Location: nwbf.devices.\<\*Probe>.\<\*Shank>.\<\*Electrode>
+Object type: ndx_franklab_novela.Electrode
+ +| Spyglass Table | Key | NWBfile Location | Config option | Notes | +| :-------------- | :----------: | -------------------------------------------------------------: | ------------: | ----: | +| Probe.Electrode | probe_shank | nwbf.devices.\<\*Probe>.\<\*Shank>.probe_shank | XXX | int | +| Probe.Electrode | contact_size | nwbf.devices.\<\*Probe>.\<\*Shank>.\<\*Electrode>.contact_size | XXX | float | +| Probe.Electrode | rel_x | nwbf.devices.\<\*Probe>.\<\*Shank>.\<\*Electrode>.rel_x | XXX | float | + + NWBfile Location: nwbf.epochs
Object type: pynwb.epoch.TimeIntervals +
+ +| Spyglass Table | Key | NWBfile Location | Config option | Notes | +| :-------------------- | :----------------: | ------------------------------------------------------------------: | ------------: | ----: | +| IntervalList (epochs) | interval_list_name | nwbf.epochs.\[index\].tags\[0\] | | str | +| IntervalList (epochs) | valid_times | \[nwbf.epoch.\[index\].start_time, nwbf.epoch.\[index\].stop_time\] | | float | + + NWBfile Location: nwbf.electrode_groups + +| Spyglass Table | Key | NWBfile Location | Config option | Notes | +| :------------- | :---------------: | ------------------------------------------------: | ------------: | ----------------------------------------------------------------------------------------------------------------------------------: | +| BrainRegion | region_name | nwbf.electrode_groups.\[index\].location | | str | +| ElectrodeGroup | description | nwbf.electrode_groups.\[index\].description | | str | +| ElectrodeGroup | probe_id | nwbf.electrode_groups.\[index\].device.probe_type | | + device must be of type ndx_franklab_novela.Probe | +| ElectrodeGroup | target_hemisphere | nwbf.electrode_groups.\[index\].targeted_x | | + electrode group must be of type ndx_franklab_novela.NwbElectrodeGroup. target_hemisphere = "Right" if targeted_x >= 0 else "Left" | + + NWBfile Location: nwbf.acquisition
Object type: +pynwb.ecephys.ElectricalSeries
+ +| Spyglass Table | Key | NWBfile Location | Config option | Notes | +| :----------------- | :----------------: | ---------------------------------------------------: | ------------: | ----: | +| Raw | sampling_rate | eseries.rate else, estimated from eseries.timestamps | | float | +| IntervalList (raw) | interval_list_name | "raw data valid times" | | str | +| IntervalList (raw) | valid_times | get_valid_intervals(eseries.timestamps, ...) | | | + + NWBfile Location: nwbf.processing.sample_count
Object type: +pynwb.base.TimeSeries
+ +| Spyglass Table | Key | NWBfile Location | Config option | Notes | +| :------------- | :-----------------: | ---------------------------: | ------------: | ----: | +| SampleCount | sample_count_obj_id | nwbf.processing.sample_count | | | + + NWBfile Location: nwbf.processing.behavior.behavioralEvents
Object +type: pynwb.base.TimeSeries
+ +| Spyglass Table | Key | NWBfile Location | Config option | Notes | +| :------------- | :------------: | --------------------------------------------------: | ------------: | ----: | +| DIOEvents | dio_event_name | nwbf.processing.behavior.behavioralEvents.name | | | +| DIOEvents | dio_obj_id | nwbf.processing.behavior.behavioralEvents.object_id | | | + + NWBfile Location: nwbf.processing.tasks
Object type: +hdmf.common.table.DynamicTable
+ +| Spyglass Table | Key | NWBfile Location | Config option | Notes | +| :------------- | :--------------: | -----------------------------------------------: | ------------: | ----: | +| Task | task_name | nwbf.processing.tasks.\[index\].name | | | +| Task | task_description | nwbf.processing.\[index\].tasks.description | | | +| TaskEpoch | task_name | nwbf.processing.\[index\].tasks.name | | | +| TaskEpoch | camera_names | nwbf.processing.\[index\].tasks.camera_id | | | +| TaskEpoch | task_environment | nwbf.processing.\[index\].tasks.task_environment | | | + + NWBfile Location: nwbf.units
Object type: pynwb.misc.Units
+ +| Spyglass Table | Key | NWBfile Location | Config option | Notes | +| :------------------- | :-------: | -------------------: | ------------: | ----: | +| ImportedSpikeSorting | object_id | nwbf.units.object_id | | | + + NWBfile Location: nwbf.electrodes
Object type: +hdmf.common.table.DynamicTable
+ +| Spyglass Table | Key | NWBfile Location | Config option | Notes | +| :------------- | :--------------------------: | -------------------------------------------------------------------------------------: | ---------------------------------------------------------------: | ---------------------------------------------------------------------------: | +| Electrode | electrode_id | nwbf.electrodes.\[index\] (the enumerated index number) | config\["Electrode"\]\[index\]\["electrode_id"\] | int | +| Electrode | name | str(nwbf.electrodes.\[index\]) nwbf.electrodes.\[index\] (the enumerated index number) | config\["Electrode"\]\[index\]\["name"\] | str | +| Electrode | group_name | nwbf.electrodes.\[index\].group_name | config\["Electrode"\]\[index\]\["group_name"\] | int | +| Electrode | x | nwbf.electrodes.\[index\].x | config\["Electrode"\]\[index\]\["x"\] | int | +| Electrode | y | nwbf.electrodes.\[index\].y | config\["Electrode"\]\[index\]\["y"\] | int | +| Electrode | z | nwbf.electrodes.\[index\].z | config\["Electrode"\]\[index\]\["z"\] | int | +| Electrode | filtering | nwbf.electrodes.\[index\].filtering | config\["Electrode"\]\[index\]\["filtering"\] | int | +| Electrode | impedance | nwbf.electrodes.\[index\].impedance | config\["Electrode"\]\[index\]\["impedance"\] | int | +| Electrode | probe_id | nwbf.electrodes.\[index\].group.device.probe_type | config\["Electrode"\]\[index\]\["probe_id"\] | if type(nwbf.electrodes.\[index\].group.device) is ndx_franklab_novela.Probe | +| Electrode | probe_shank | nwbf.electrodes.\[index\].group.device.probe_shank | config\["Electrode"\]\[index\]\["probe_shank"\] | if type(nwbf.electrodes.\[index\].group.device) is ndx_franklab_novela.Probe | +| Electrode | probe_electrode | nwbf.electrodes.\[index\].group.device.probe_electrode | config\["Electrode"\]\[index\]\["probe_electrode"\] | if type(nwbf.electrodes.\[index\].group.device) is ndx_franklab_novela.Probe | +| Electrode | bad_channel | nwbf.electrodes.\[index\].group.device.bad_channel | config\["Electrode"\]\[index\]\["bad_channel"\] | if type(nwbf.electrodes.\[index\].group.device) is ndx_franklab_novela.Probe | +| Electrode | original_reference_electrode | nwbf.electrodes.\[index\].group.device.ref_elect_id | config\["Electrode"\]\[index\]\["original_reference_electrode"\] | if type(nwbf.electrodes.\[index\].group.device) is ndx_franklab_novela.Probe | + + NWBfile Location: nwbf.processing.behavior.position
Object type: +(pynwb.behavior.Position).(pynwb.behavior.SpatialSeries)
+ +| Spyglass Table | Key | NWBfile Location | Config option | Notes | +| :--------------------------- | :--------------------: | -------------------------------------------------------------------------------: | ------------: | --------------------: | +| IntervalList (position) | interval_list_name | "pos {index} valid times" | | | +| IntervalList (position) | valid_times | get_valid_intervals(nwbf.processing.behavior.position.\[index\].timestamps, ...) | | | +| PositionSource | source | "trodes" | | TODO: infer from file | +| PositionSource | interval_list_name | See: IntervalList (position) | | | +| PositionSource.SpatialSeries | id | int(nwbf.processing.behavior.position.\[index\]) (the enumerated index number) | | | +| RawPosition.PosObject | raw_position_object_id | nwbf.processing.behavior.position.\[index\].object_id | | | + + NWBfile Location: nwbf.processing.video_files.video
Object type: +pynwb.image.ImageSeries
+ +| Spyglass Table | Key | NWBfile Location | Config option | Notes | +| :------------- | :---------: | ------------------------------------------------------: | ------------: | ----: | +| VideoFile | camera_name | nwbf.processing.video_files.video.\[index\].camera_name | | | + + NWBfile Location: nwbf.processing.associated_files
Object type: +ndx_franklab_novela.AssociatedFiles
+ +| Spyglass Table | Key | NWBfile Location | Config option | Notes | +| :-------------- | :---: | -----------------------------------------------------: | ------------: | --------------------------------------------------------------------------------------: | +| StateScriptFile | epoch | nwbf.processing.associated_files.\[index\].task_epochs | | type(nwbf.processing.associated_files.\[index\]) == ndx_franklab_novela.AssociatedFiles | diff --git a/docs/src/ForDevelopers/index.md b/docs/src/ForDevelopers/index.md new file mode 100644 index 000000000..6128aa10e --- /dev/null +++ b/docs/src/ForDevelopers/index.md @@ -0,0 +1,37 @@ +# For Developers + +This folder covers the process of developing new pipelines and features to be +used with Spyglass. + +## Contributing + +If you're looking to contribute to the project itself, either by adding a new +pipeline or improving an existing one, please review the article on +[contributing](./Contribute.md). + +Any computation that might be useful for more that one project is a good +candidate for contribution. If you're note sure, feel free to +[open an issue](https://github.com/LorenFrankLab/spyglass/issues/new) to +discuss. + +## Management + +If you're looking to declare and manage your own instance of Spyglass, please +review the article on [database management](./Management.md). + +## Custom + +This folder also contains a number of articles on understanding pipelines in +order to develop your own. + +- [Code for Reuse](./Reuse.md) discusses good practice for writing readable and + reusable code in Python. +- [Table Types](./TableTypes.md) explains the different table motifs in Spyglass + and how to use them. +- [Schema design](./Schema.md) explains the anatomy of a Spyglass schema and + gives a model for writing your version of each of the types of tables. +- [Using NWB](./UsingNWB.md) explains how to use the NWB format in Spyglass. + +If you'd like help in developing a new pipeline, please reach out to the +Spyglass team via our +[discussion board](https://github.com/LorenFrankLab/spyglass/discussions). diff --git a/docs/src/api/index.md b/docs/src/api/index.md index d616c0757..1d23e35b4 100644 --- a/docs/src/api/index.md +++ b/docs/src/api/index.md @@ -4,24 +4,19 @@ The files in this directory are automatically generated from the docstrings in the source code. They include descriptions of each of the DataJoint tables and other classes/methods within Spyglass. -These docs are updated any time a new release is made or a tag is pushed to the -repository. +## Directories - +- `cli`: See README.md at `spyglass/examples/cli/README.md` +- `common`: Data insertion point for all pipelines. +- `data_import`: Data insertion tools. +- `decoding`: Decoding animal position from spiking data. +- `figurl_views`: Tools for visualizing data. +- `lfp`: Local field potential processing. +- `lock`: Tables for locking files, preventing deletion. +- `position`: Tracking animal posisiton via LEDs ('Trodes') or DeepLabCut. +- `linearization`: Linearizing position data for decoding. +- `ripple`: Detecting ripples in LFP data. +- `sharing`: Tables for data sharing via Kachery. +- `spikesorting`: Sorting spikes from raw electrophysiology data. +- `utils`: Utilities for working with DataJoint and Neurodata Without Borders + (NWB) data. diff --git a/docs/src/contribute.md b/docs/src/contribute.md deleted file mode 100644 index d34698a39..000000000 --- a/docs/src/contribute.md +++ /dev/null @@ -1,244 +0,0 @@ -# Developer notes - -Notes on how the repo / database is organized, intended for a new developer. - -## Development workflow - -New contributors should follow the -[Fork-and-Branch workflow](https://www.atlassian.com/git/tutorials/comparing-workflows/forking-workflow). -See GitHub instructions -[here](https://docs.github.com/en/get-started/quickstart/contributing-to-projects). - -Regular contributors may choose to follow the -[Feature Branch Workflow](https://www.atlassian.com/git/tutorials/comparing-workflows/feature-branch-workflow) -for features that will involve multiple contributors. - -## Code organization - -- Tables are grouped into schemas by topic (e.g., `common_metrics`) -- Schemas - - Are defined in a `py` pile. - - Correspond to MySQL 'databases'. - - Are organized into modules (e.g., `common`) by folders. -- The _common_ module - - In principle, contains schema that are shared across all projects. - - In practice, contains shared tables (e.g., Session) and the first draft of - schemas that have since been split into their own - modality-specific\ - modules (e.g., `lfp`) - - Should not be added to without discussion. -- A pipeline - - Refers to a set of tables used for processing data of a particular modality - (e.g., LFP, spike sorting, position tracking). - - May span multiple schema. -- For analysis that will be only useful to you, create your own schema. - -## Types of tables - -Spyglass uses DataJoint's default -[table tiers](https://datajoint.com/docs/core/datajoint-python/0.14/design/tables/tiers/). - -By convention, an individual pipeline has one or more the following table types: - -- Common/Multi-pipeline table -- NWB ingestion table -- Parameters table -- Selection table -- Data table -- Merge Table (see also [doc](./misc/merge_tables.md)) - -### Common/Multi-pipeline - -Tables shared across multiple pipelines for shared data types. - -- Naming convention: None -- Data tier: `dj.Manual` -- Examples: `IntervalList` (time interval for any analysis), `AnalysisNwbfile` - (analysis NWB files) - -_Note_: Because these are stand-alone tables not part of the dependency -structure, developers should include enough information to link entries back to -the pipeline where the data is used. - -### NWB ingestion - -Automatically populated when an NWB file is ingested (i.e., `dj.Imported`) to -keep track of object hashes (i.e., `object_id`) in the NWB file. All such tables -should be included in the `make` method of `Session`. - -- Naming convention: None -- Data tier: `dj.Imported` -- Primary key: foreign key from `Session` -- Non-primary key: `object_id`, the unique hash of an object in the NWB file. -- Examples: `Raw`, `Institution`, etc. -- Required methods: - - `make`: must read information from an NWB file and insert it to the table. - - `fetch_nwb`: retrieve the data specified by the object ID. - -### Parameters - -Stores the set of values that may be used in an analysis. - -- Naming convention: end with `Parameters` or `Params` -- Data tier: `dj.Manual`, or `dj.Lookup` -- Primary key: `{pipeline}_params_name`, `varchar` -- Non-primary key: `{pipeline}_params`, `blob` - dict of parameters -- Examples: `RippleParameters`, `DLCModelParams` -- Possible method: if `dj.Manual`, include `insert_default` - -_Notes_: Some early instances of Parameter tables (a) used non-primary keys for -each individual parameter, and (b) use the Manual rather than Lookup tier, -requiring a class method to insert defaults. - -### Selection - -A staging area to pair sessions with parameter sets, allowing us to be selective -in the analyses we run. It may not make sense to pair every paramset with every -session. - -- Naming convention: end with `Selection` -- Data tier: `dj.Manual` -- Primary key(s): Foreign key references to - - one or more NWB or data tables - - optionally, one or more parameter tables -- Non-primary key: None -- Examples: `MetricSelection`, `LFPSelection` - -It is possible for a Selection table to collect information from more than one -Parameter table. For example, the Selection table for spike sorting holds -information about both the interval (`SortInterval`) and the group of electrodes -(`SortGroup`) to be sorted. - -### Data - -The output of processing steps associated with a selection table. Has a `make` -method that carries out the computation specified in the Selection table when -`populate` is called. - -- Naming convention: None -- Data tier: `dj.Computed` -- Primary key: Foreign key reference to a Selection table. -- Non-primary key: `analysis_file_name` inherited from `AnalysisNwbfile` table - (i.e., name of the analysis NWB file that will hold the output of the - computation). -- Required methods: - - `make`: carries out the computation and insert a new entry; must also create - an analysis NWB file and insert it to the `AnalysisNwbfile` table. Note - that this method is never called directly; it is called via `populate`. - Multiple entries can be run in parallel when called with - `reserve_jobs=True`. - - `delete`: extension of the `delete` method that checks user privilege before - deleting entries as a way to prevent accidental deletion of computations - that take a long time (see below). -- Example: `QualityMetrics`, `LFPV1` - -### Merge - -Following a convention outlined in [the dedicated doc](./misc/merge_tables.md), -merges the output of different pipelines dedicated to the same modality as part -tables (e.g., common LFP, LFP v1, imported LFP) to permit unified downstream -processing. - -- Naming convention: `{Pipeline}Output` -- Data tier: custom `_Merge` class -- Primary key: `merge_id`, `uuid` -- Non-primary key: `source`, `varchar` table name associated with that entry -- Required methods: None - see custom class methods with `merge_` prefix -- Example: `LFPOutput`, `PositionOutput` - -## Integration with NWB - -### NWB files - -NWB files contain everything about the experiment and form the starting point of -all analyses. - -- Naming: `{animal name}YYYYMMDD.nwb` -- Storage: - - On disk, directory identified by `settings.py` as `raw_dir` (e.g., - `/stelmo/nwb/raw`) - - In database, in the `Nwbfile` table -- Copies: - - made with an underscore `{animal name}YYYYMMDD_.nwb` - - stored in the same `raw_dir` - - contain pointers to objects in original file - - permit adding new parts to the NWB file without risk of corrupting the - original data - -### Analysis files - -Hold the results of intermediate steps in the analysis. - -- Naming: `{animal name}YYYYMMDD_{10-character random string}.nwb` -- Storage: - - On disk, directory identified by `settings.py` as `analysis_dir` (e.g., - `/stelmo/nwb/analysis`). Items are further sorted into folders matching - original NWB file name - - In database, in the `AnalysisNwbfile` table. -- Examples: filtered recordings, spike times of putative units after sorting, or - waveform snippets. - -_Note_: Because NWB files and analysis files exist both on disk and listed in -tables, these can become out of sync. You can 'equalize' the database table -lists and the set of files on disk by running `cleanup` method, which deletes -any files not listed in the table from disk. - -## Reading and writing recordings - -Recordings start out as an NWB file, which is opened as a -`NwbRecordingExtractor`, a class in `spikeinterface`. When using `sortingview` -for visualizing the results of spike sorting, this recording is saved again in -HDF5 format. This duplication should be resolved in the future. - -## Naming convention - -The following objects should be uniquely named. - -- _Recordings_: Underscore-separated concatenations of uniquely defining - features, - `NWBFileName_IntervalName_ElectrodeGroupName_PreprocessingParamsName`. -- _SpikeSorting_: Adds `SpikeSorter_SorterParamName` to the name of the - recording. -- _Waveforms_: Adds `_WaveformParamName` to the name of the sorting. -- _Quality metrics_: Adds `_MetricParamName` to the name of the waveform. -- _Analysis NWB files_: - `NWBFileName_IntervalName_ElectrodeGroupName_PreprocessingParamsName.nwb` -- Each recording and sorting is given truncated UUID strings as part of - concatenations. - -Following broader Python conventions, methods a method that will not be -explicitly called by the user should start with `_` - -## Time - -The `IntervalList` table stores all time intervals in the following format: -`[start_time, stop_time]`, which represents a contiguous time of valid data. -These are used to exclude any invalid timepoints, such as missing data from a -faulty connection. - -- Intervals can be nested for a set of disjoint intervals. -- Some recordings have explicit - [PTP timestamps](https://en.wikipedia.org/wiki/Precision_Time_Protocol) - associated with each sample. Some older recordings are missing PTP times, - and times must be inferred from the TTL pulses from the camera. - -## Misc - -- During development, we suggest using a Docker container. See - [example](./notebooks/00_Setup.ipynb). -- `numpy` style docstrings will be interpreted by API docs. To check for - compliance, monitor the std out when building docs (see `docs/README.md`) - -## Making a release - -Spyglass follows [Semantic Versioning](https://semver.org/) with versioning of -the form `X.Y.Z` (e.g., `0.4.2`). - -1. In `CITATION.cff`, update the `version` key. -2. Make a pull request with changes. -3. After the pull request is merged, pull this merge commit and tag it with - `git tag {version}` -4. Publish the new release tag. Run `git push origin {version}`. This will - rebuild docs and push updates to PyPI. -5. Make a new - [release on GitHub](https://docs.github.com/en/repositories/releasing-projects-on-github/managing-releases-in-a-repository). diff --git a/docs/src/images/merge_diagram.png b/docs/src/images/merge_diagram.png index 72b0fc8c0..0829cc046 100644 Binary files a/docs/src/images/merge_diagram.png and b/docs/src/images/merge_diagram.png differ diff --git a/docs/src/images/merge_diagram_large.png b/docs/src/images/merge_diagram_large.png new file mode 100644 index 000000000..72b0fc8c0 Binary files /dev/null and b/docs/src/images/merge_diagram_large.png differ diff --git a/docs/src/index.md b/docs/src/index.md index 3f70c1c9c..3246f8224 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -6,6 +6,8 @@ reproducible analysis of neuroscience data and sharing of the results with collaborators and the broader community. +## Features + Features of Spyglass include: - **Standardized data storage** - Spyglass uses the open-source @@ -48,7 +50,7 @@ Features of Spyglass include: ## Getting Started -This site hosts both [installation instructions](./installation.md) and +This site hosts both installation instructions as part of our [tutorials](./notebooks/index.md) to help you get started with Spyglass. We recommend running the notebooks yourself. They can be downloaded from GitHub [here](https://github.com/LorenFrankLab/spyglass). @@ -56,22 +58,30 @@ recommend running the notebooks yourself. They can be downloaded from GitHub ## Diving Deeper The [API Reference](./api/index.md) provides a detailed description of all the -tables and class functions in Spyglass via python docstrings. Potential -contributors should also read the [Developer Guide](./contribute.md). Those -interested in in hosting a Spyglass instance for their own data should read the -[database management guide](./misc/database_management.md). +tables and class functions in Spyglass via Python docstrings. + +To highlight some of the key features of Spyglass and some features added to +DataJoint, we have a series of articles on Spyglass +[features](./Features/index.md). -We have a series of additional docs under the [misc](./misc/index.md) folder -that may be helpful. Our [changelog](./CHANGELOG.md) highlights the changes that -have been made to Spyglass over time and the [copyright](./LICENSE.md) page -contains license information. +Our [developer guide](./ForDevelopers/index.md) provides an overview of +development practices for either contributing to the project itself or setting +up custom pipelines for your own analysis. + +Our [changelog](./CHANGELOG.md) highlights the changes that have been made to +Spyglass over time and the [copyright](./LICENSE.md) page contains license +information. ## Citing Spyglass -> Lee, K.H.\*, Denovellis, E.L.\*, Ly, R., Magland, J., Soules, J., Comrie, A.E., Gramling, D.P., Guidera, J.A., Nevers, R., Adenekan, P., Brozdowski, C., Bray, S., Monroe, E., Bak, J.H., Coulter, M.E., Sun, X., Tritt, A., Rübel, O., Nguyen, T., Yatsenko, D., Chu, J., Kemere, C., Garcia, S., Buccino, A., Frank, L.M., 2024. Spyglass: a data analysis framework for reproducible and shareable neuroscience research. bioRxiv. [10.1101/2024.01.25.577295](https://doi.org/10.1101/2024.01.25.577295 ). +> Lee, K.H.\*, Denovellis, E.L.\*, Ly, R., Magland, J., Soules, J., Comrie, +> A.E., Gramling, D.P., Guidera, J.A., Nevers, R., Adenekan, P., Brozdowski, C., +> Bray, S., Monroe, E., Bak, J.H., Coulter, M.E., Sun, X., Tritt, A., Rübel, O., +> Nguyen, T., Yatsenko, D., Chu, J., Kemere, C., Garcia, S., Buccino, A., Frank, +> L.M., 2024. Spyglass: a data analysis framework for reproducible and shareable +> neuroscience research. bioRxiv. +> [10.1101/2024.01.25.577295](https://doi.org/10.1101/2024.01.25.577295). *\* Equal contribution* See paper related code [here](https://github.com/LorenFrankLab/spyglass-paper). - - diff --git a/docs/src/installation.md b/docs/src/installation.md deleted file mode 100644 index d588d2daf..000000000 --- a/docs/src/installation.md +++ /dev/null @@ -1,127 +0,0 @@ -# Installation - -_Note:_ Developers, or those who wish to add features or otherwise work on the -codebase should follow the same steps below, but install Spyglass as editable -with the `-e` flag: `pip install -e /path/to/spyglass` - -## Basic Installation - -For basic installation steps, see the -[Setup notebook](./notebooks/00_Setup.ipynb) 'local installation' section, -including python, mamba (for managing a -[virtual environment](https://en.wikipedia.org/wiki/Virtual_environment_software)), -VSCode, Jupyter, and git. This notebook also covers -[database access](#database-access). - -## Additional Packages - -Some pipelines require installation of additional packages. - -The spike sorting pipeline relies on `spikeinterface` and optionally -`mountainsort4`. - -```bash -pip install spikeinterface[full,widgets] -pip install mountainsort4 -``` - -__WARNING:__ If you are on an M1 Mac, you need to install `pyfftw` via `conda` -BEFORE installing `ghostipy`: - -```bash -conda install -c conda-forge pyfftw -``` - -The LFP pipeline uses `ghostipy`: - -```bash -pip install ghostipy -``` - -## Database access - -For basic installation steps, see the -[Setup notebook](./notebooks/00_Setup.ipynb) 'database connection' section. For -additional details, see the -[DataJoint documentation](https://datajoint.com/docs/elements/user-guide/#relational-databases). - -### Config - -#### Via File (Recommended) - -A `dj_local_conf.json` file in your current directory when launching python can -hold all the specifics needed to connect to a database. This can include -different directories for different pipelines. If only the Spyglass `base` is -specified, other subfolder names are assumed from defaults. See -`dj_local_conf_example.json` for the full set of options. This example can be -copied and saved as `dj_local_conf.json` to set the configuration for a given -folder. Alternatively, it can be saved as `.datajoint_config.json` in a user's -home directory to be accessed globally. See -[DataJoint docs](https://datajoint.com/docs/core/datajoint-python/0.14/quick-start/#connection) -for more details. - -Note that raw and analysis folder locations should be specified under both -`stores` and `custom` sections of the config file. The `stores` section is used -by DataJoint to store the location of files referenced in database, while the -`custom` section is used by Spyglass. Spyglass will check that these sections -match on startup. - -#### Via Environment Variables - -Older versions of Spyglass relied exclusively on environment for config. If -`spyglass_dirs` is not found in the config file, Spyglass will look for -environment variables. These can be set either once in a terminal session, or -permanently in a unix settings file (e.g., `.bashrc` or `.bash_profile`) in your -home directory. - -```bash -export SPYGLASS_BASE_DIR="/stelmo/nwb" -export SPYGLASS_RECORDING_DIR="$SPYGLASS_BASE_DIR/recording" -export SPYGLASS_SORTING_DIR="$SPYGLASS_BASE_DIR/sorting" -export SPYGLASS_VIDEO_DIR="$SPYGLASS_BASE_DIR/video" -export SPYGLASS_WAVEFORMS_DIR="$SPYGLASS_BASE_DIR/waveforms" -export SPYGLASS_TEMP_DIR="$SPYGLASS_BASE_DIR/tmp" -export DJ_SUPPORT_FILEPATH_MANAGEMENT="TRUE" -``` - -To load variables from a `.bashrc` file, run `source ~/.bashrc` in a terminal. - -#### Temporary directory - -A temporary directory will speed up spike sorting. If unspecified by either -method above, it will be assumed as a `tmp` subfolder relative to the base path. -Be sure it has enough free space (ideally at least 500GB). - -#### Subfolders - -If subfolders do not exist, they will be created automatically. If unspecified -by either method above, they will be assumed as `recording`, `sorting`, `video`, -etc. subfolders relative to the base path. - -## File manager - -[`kachery-cloud`](https://github.com/flatironinstitute/kachery-cloud) is a file -manager for Frank Lab collaborators who do not have access to the lab's -production database. - -To customize `kachery` file paths, see `dj_local_conf_example.json` or set the -following variables in your unix settings file (e.g., `.bashrc`). If -unspecified, the defaults below are assumed. - -```bash -export KACHERY_CLOUD_DIR="$SPYGLASS_BASE_DIR/.kachery-cloud" -export KACHERY_TEMP_DIR="$SPYGLASS_BASE_DIR/tmp" -``` - -Be sure to load these with `source ~/.bashrc` to persist changes. - -## Test connection - -Finally, open up a python console (e.g., run `ipython` from terminal) and import -`spyglass` to check that the installation has worked. - -```python -from spyglass.common import Nwbfile - -Nwbfile() -``` diff --git a/docs/src/misc/common_errs.md b/docs/src/misc/common_errs.md deleted file mode 100644 index 34143b0f5..000000000 --- a/docs/src/misc/common_errs.md +++ /dev/null @@ -1,111 +0,0 @@ -# Common Errors - -## Debug Mode - -To enter into debug mode, you can add the following line to your code ... - -```python -__import__("pdb").set_trace() -``` - -This will set a breakpoint in your code at that line. When you run your code, it -will pause at that line and you can explore the variables in the current frame. -Commands in this mode include ... - -- `u` and `d` to move up and down the stack -- `l` to list the code around the current line -- `q` to quit the debugger -- `c` to continue running the code -- `h` for help, which will list all the commands - -`ipython` and jupyter notebooks can launch a debugger automatically at the last -error by running `%debug`. - -## Integrity - -```console -IntegrityError: Cannot add or update a child row: a foreign key constraint fails (`schema`.`_table`, CONSTRAINT `_table_ibfk_1` FOREIGN KEY (`parent_field`) REFERENCES `other_schema`.`parent_name` (`parent_field`) ON DELETE RESTRICT ON UPDATE CASCADE) -``` - -`IntegrityError` during `insert` means that some part of the key you're -inserting doesn't exist in the parent of the table you're inserting into. You -can explore which that may be by doing the following... - -```python -my_key = dict(value=key) # whatever you're inserting -MyTable.insert1(my_key) # error here -parents = MyTable.parents(as_objects=True) # get the parents as FreeTables -for parent in parents: # iterate through the parents, with only relevant fields - parent_key = {k: v for k, v in my_key.items() if k in parent.heading.names} - print(parent & parent_key) # restricted parent -``` - -If any of the printed tables are empty, you know you need to insert into that -table (or another ancestor up the pipeline) first. This code will not work if -there are aliases in the table (i.e., `proj` in the definition). In that case, -you'll need to modify your `parent_key` to reflect the renaming. - -The error message itself will tell you which table is the limiting parent. After -`REFERENCES` in the error message, you'll see the parent table and the column -that is causing the error. - -## Permission - -```console -('Insufficient privileges.', "INSERT command denied to user 'username'@'127.0.0.1' for table '_table_name'", 'INSERT INTO `schema_name`.`table_name`(`field1`,`field2`) VALUES (%s,%s)') -``` - -This is a MySQL error that means that either ... - -- You don't have access to the command you're trying to run (e.g., `INSERT`) -- You don't have access to this command on the schema you're trying to run it on - -To see what permissions you have, you can run the following ... - -```python -dj.conn().query("SHOW GRANTS FOR CURRENT_USER();").fetchall() -``` - -If you think you should have access to the command, you contact your database -administrator (e.g., Chris in the Frank Lab). Please share the output of the -above command with them. - -## Type - -```console -TypeError: example_function() got an unexpected keyword argument 'this_arg' -``` - -This means that you're calling a function with an argument that it doesn't -expect (e.g., `example_function(this_arg=5)`). You can check the function's -accepted arguments by running `help(example_function)`. - -```console -TypeError: 'NoneType' object is not iterable -``` - -This means that some function is trying to do something with an object of an -unexpected type. For example, if might by running `for item in variable: ...` -when `variable` is `None`. You can check the type of the variable by going into -debug mode and running `type(variable)`. - -## KeyError - -```console -KeyError: 'field_name' -``` - -This means that you're trying to access a key in a dictionary that doesn't -exist. You can check the keys of the dictionary by running `variable.keys()`. If -this is in your custom code, you can get a key and supply a default value if it -doesn't exist by running `variable.get('field_name', default_value)`. - -## DataJoint - -```console -DataJointError("Attempt to delete part table {part} before deleting from its master {master} first.") -``` - -This means that DataJoint's delete process found a part table with a foreign key -reference to the data you're trying to delete. You need to find the master table -listed and delete from that table first. diff --git a/docs/src/misc/index.md b/docs/src/misc/index.md deleted file mode 100644 index 51ef0007d..000000000 --- a/docs/src/misc/index.md +++ /dev/null @@ -1,12 +0,0 @@ -# Misc Docs - -This folder contains miscellaneous supporting files documentation. - -- [Common Errors](./common_errs.md) -- [Database Management](./database_management.md) -- [Export](./export.md) -- [Insert Data](./insert_data.md) -- [Merge Tables](./merge_tables.md) -- [Mixin Class](./mixin.md) -- [Session Groups](./session_groups.md) -- [figurl Views](./figurl_views.md) diff --git a/docs/src/misc/insert_data.md b/docs/src/misc/insert_data.md deleted file mode 100644 index 1706c7f73..000000000 --- a/docs/src/misc/insert_data.md +++ /dev/null @@ -1,101 +0,0 @@ -# How to insert data into `spyglass` - -In `spyglass`, every table corresponds to an object. An experimental session is -defined as a collection of such objects. When an NWB file is ingested into -`spyglass`, the information about these objects is first read and inserted into -tables in the `common` module (e.g. `Institution`, `Lab`, `Electrode`, etc). -However, not every NWB file has all the information required by `spyglass`. For -example, many NWB files do not contain any information about the -`DataAcquisitionDevice` or `Probe` because NWB does not yet have an official -standard for specifying them. In addition, one might find that the information -contained in the NWB file is incorrect and would like to modify it before -inserting it into `spyglass` without having to go through the time-consuming -process of re-generating the NWB file. For these cases, we provide an -alternative approach to inserting data to `spyglass`. - -This alternate approach consists of two steps. First, the user must identify -entries that they would like to add to the `spyglass` database that exist -independently of any particular NWB file. For example, information about a -particular probe is stored in the `ProbeType` and `Probe` tables of -`spyglass.common`. The user can either: - -1. create these entries programmatically using DataJoint `insert` commands, for - example: - - ```python - sgc.ProbeType.insert1( - { - "probe_type": "128c-4s6mm6cm-15um-26um-sl", - "probe_description": "A Livermore flexible probe with 128 channels, 4 shanks, 6 mm shank length, 6 cm ribbon length. 15 um contact diameter, 26 um center-to-center distance (pitch), single-line configuration.", - "manufacturer": "Lawrence Livermore National Lab", - "num_shanks": 4, - }, - skip_duplicates=True, - ) - ``` - -2. define these entries in a special YAML file called `entries.yaml` that is - processed when `spyglass` is imported. One can think of `entries.yaml` as a - place to define information that the database should come pre-equipped - prior to ingesting any NWB files. The `entries.yaml` file should be placed - in the `spyglass` base directory. An example can be found in - `examples/config_yaml/entries.yaml`. It has the following structure: - - ```yaml - TableName: - - TableEntry1Field1: Value - - TableEntry1Field2: - - TableEntry2Field1: Value - - TableEntry2Field2: Value - ``` - - For example, - - ```yaml - ProbeType: - - probe_type: 128c-4s6mm6cm-15um-26um-sl - probe_description: A Livermore flexible probe with 128 channels, 4 shanks, 6 mm shank - length, 6 cm ribbon length. 15 um contact diameter, 26 um center-to-center distance - (pitch), single-line configuration. - manufacturer: Lawrence Livermore National Lab - num_shanks: 4 - ``` - -Using a YAML file over programmatically creating these entries in a notebook or -script has the advantages that the YAML file maintains a record of what entries -have been added that is easy to access, and the file is portable and can be -shared alongside an NWB file or set of NWB files from a given experiment. - -Next, the user must associate the NWB file with entries defined in the database. -This is done by cresqating a _configuration file_, which must: be in the same -directory as the NWB file that it configures be in YAML format have the -following naming convention: `_spyglass_config.yaml`. - -Users can programmatically generate this configuration file. It is then read by -spyglass when calling `insert_session` on the associated NWB file. - -An example of this can be found at -`examples/config_yaml/​​sub-AppleBottom_ses-AppleBottom-DY20-g3_behavior+ecephys_spyglass_config.yaml`. -This file is associated with the NWB file -`sub-AppleBottom_ses-AppleBottom-DY20-g3_behavior+ecephys.nwb`. - -This is the general format for the config entry: - -```yaml -TableName: - - primary_key1: value1 -``` - -For example: - -```yaml -DataAcquisitionDevice: - - data_acquisition_device_name: Neuropixels Recording Device -``` - -In this example, the NWB file that corresponds to this config YAML will become -associated with the DataAcquisitionDevice with primary key -data_acquisition_device_name: Neuropixels Recording Device. This entry must -exist. diff --git a/notebooks/00_Setup.ipynb b/notebooks/00_Setup.ipynb index 9bfeff14b..578c6c179 100644 --- a/notebooks/00_Setup.ipynb +++ b/notebooks/00_Setup.ipynb @@ -47,8 +47,18 @@ "id": "aa6bddcb", "metadata": {}, "source": [ - "JupyterHub users can skip this step. Frank Lab members should first follow\n", - "'rec to nwb overview' steps on Google Drive to set up an ssh connection.\n", + "Skip this step if you're ...\n", + "\n", + "1. Running the tutorials on [JupyterHub](https://spyglass.hhmi.2i2c.cloud/) \n", + "2. A member of the Frank Lab members. Instead, ssh to a shared machine.\n" + ] + }, + { + "cell_type": "markdown", + "id": "520ea38f", + "metadata": {}, + "source": [ + "### Tools\n", "\n", "For local use, download and install ...\n", "\n", @@ -68,15 +78,147 @@ "4. [git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) for\n", " downloading the repository, including notebooks.\n", "\n", + "See [this DataJoint guide](https://datajoint.com/docs/elements/user-guide/) for \n", + "additional details on each of these programs and the role they play in using the \n", + "pipeline.\n", + "\n", + "
Suggested VSCode settings\n", + "\n", + "Within the Spyglass repository, there is a `.vscode` folder with `json` files\n", + "that specify limited settings and extensions intended for developers. The average\n", + "user may benefit from the following fuller sets.\n", + "\n", + "We recommending these incrementally so you get a feel for what each one does\n", + "before adding the next, and to avoid being overwhelmed by changes.\n", + "\n", + "1. `extensions.json`. By updating this file, you'll add to the 'Recommended'\n", + "section of the extensions tab. Each extension page will provide more information \n", + "on the uses and benefits. Some relevant concepts include...\n", + " - Linting: Warning of potential problems\n", + " - Formatting: Auto-adjusting optional coding styles to align across users\n", + " - Debugger: Progressive running of code. Please search for tutorials\n", + " - Autocompletion: Prompting for potential options when coding\n", + "\n", + "```json\n", + "{\n", + " \"recommendations\": [\n", + " // Python Extensions\n", + " \"charliermarsh.ruff\", // Fast linter\n", + " \"donjayamanne.python-environment-manager\", // Environment manager\n", + " \"kevinrose.vsc-python-indent\", // Auto-indent when coding\n", + " \"ms-python.black-formatter\", // Opinionated formatting\n", + " \"ms-python.debugpy\", // Debugger\n", + " \"ms-python.isort\", // Opinionated formatter for imports\n", + " \"ms-python.pylint\", // Linter to support a DataJoint-specific linter\n", + " \"ms-python.python\", // Language support for Python\n", + " \"ms-python.vscode-pylance\", // Additional language support\n", + " // Jupyter\n", + " \"ms-toolsai.jupyter\", // Run notebooks in VSCode\n", + " \"ms-toolsai.jupyter-keymap\", // Allow key-bindings\n", + " \"ms-toolsai.jupyter-renderers\", // Display images\n", + " // Autocompletion/Markdown\n", + " \"github.copilot\", // Auto-suggest with copilot LLM\n", + " \"github.copilot-chat\", // Add chat-box for questions to LLM\n", + " \"visualstudioexptteam.intellicode-api-usage-examples\", // Prompt package options\n", + " \"visualstudioexptteam.vscodeintellicode\", // Prompt Python-general options\n", + " \"davidanson.vscode-markdownlint\", // Linter for markdown\n", + " \"streetsidesoftware.code-spell-checker\", // Spell checker\n", + " // SSH - Work on remote servers - Required for Frank Lab members\n", + " \"ms-vscode-remote.remote-ssh\",\n", + " \"ms-vscode-remote.remote-ssh-edit\",\n", + " \"ms-vscode.remote-explorer\",\n", + " ],\n", + " \"unwantedRecommendations\": []\n", + "}\n", + "```\n", + "\n", + "2. `settings.json`. These can be places just in Spyglass, or added to your user\n", + "settings file. Search settings in the command panel (cmd/ctrl+shift+P) to open\n", + "this file directly.\n", + "\n", + "```json\n", + "{\n", + " // GENERAL\n", + " \"editor.insertSpaces\": true, // tab -> spaces\n", + " \"editor.rulers\": [ 80 ], // vertical line at 80\n", + " \"editor.stickyScroll.enabled\": true, // Show scope at top\n", + " \"files.associations\": { \"*.json\": \"jsonc\" }, // Load JSON with comments\n", + " \"files.autoSave\": \"onFocusChange\", // Save on focus change\n", + " \"files.exclude\": { // Hide these in the file viewer\n", + " \"**/__pycache*\": true, // Add others with wildcards\n", + " \"**/.ipynb_ch*\": true, \n", + " },\n", + " \"files.trimTrailingWhitespace\": true, // Remove extra spaces in lines\n", + " \"git.enabled\": true, // use git \n", + " \"workbench.editorAssociations\": { // open file extension as given type\n", + " \"*.ipynb\": \"jupyter-notebook\", \n", + " },\n", + " // PYTHON\n", + " \"editor.defaultFormatter\": \"ms-python.black-formatter\", // use black\n", + " \"[python]\": {\n", + " \"editor.formatOnSave\": true,\n", + " \"editor.defaultFormatter\": \"ms-python.black-formatter\",\n", + " \"editor.codeActionsOnSave\": { \"source.organizeImports\": \"always\"},\n", + " },\n", + " \"python.analysis.autoImportCompletions\": false, // Disable auto-import\n", + " \"python.languageServer\": \"Pylance\", // Use Pylance\n", + " \"pylint.args\": [ // DataJoint linter optional\n", + " // \"--load-plugins=datajoint_linter\", // Requires pip installing\n", + " // \"--permit-dj-filepath=y\", // Specific to datajoint_linter\n", + " \"--disable=E0401,E0102,W0621,W0401,W0611,W0614\"\n", + " ],\n", + " // NOTEBOOKS\n", + " \"jupyter.askForKernelRestart\": false, // Prevent dialog box on restart\n", + " \"jupyter.widgetScriptSources\": [\"jsdelivr.com\", \"unpkg.com\"], // IPyWidgets\n", + " \"notebook.output.textLineLimit\": 15, // Limit output\n", + " \"notebook.lineNumbers\": \"on\", // Number lines in cells\n", + " \"notebook.formatOnSave.enabled\": true, // blackify cells\n", + " // AUTOCOMPLETION\n", + " \"editor.tabCompletion\": \"on\", // tab over suggestions\n", + " \"github.copilot.editor.enableAutoCompletions\": true, // Copilot\n", + " \"cSpell.enabled\": true, // Spellcheck\n", + " \"cSpell.language\": \"en,en-US,companies,python,python-common\",\n", + " \"cSpell.maxDuplicateProblems\": 2, // Only mention a problem twice\n", + " \"cSpell.spellCheckDelayMs\": 500, // Wait 0.5s after save\n", + " \"cSpell.userWords\": [ \"datajoint\", \"longblob\", ], // Add words\n", + " \"cSpell.enableFiletypes\": [ \n", + " \"!json\", \"markdown\", \"yaml\", \"python\" // disable (!) json, check others\n", + " ],\n", + " \"cSpell.logLevel\": \"Warning\", // Only show warnings, can turn off\n", + " // MARKDOWN\n", + " \"[markdown]\": { // Use linter and format on save\n", + " \"editor.defaultFormatter\": \"DavidAnson.vscode-markdownlint\",\n", + " \"editor.formatOnSave\": true,\n", + " },\n", + " \"editor.codeActionsOnSave\": { \"source.fixAll.markdownlint\": \"explicit\" },\n", + " \"rewrap.reformat\": true, // allows context-aware rewrapping\n", + " \"rewrap.wrappingColumn\": 80, // Align with Black formatter\n", + "}\n", + "```\n", + "\n", + "The DataJoint linter is available at \n", + "[this repository](https://github.com/CBroz1/datajoint_linter).\n", + "\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "df7554fc", + "metadata": {}, + "source": [ + "\n", + "### Installation\n", + "\n", "In a terminal, ...\n", "\n", - "1. navigate to your project directory.\n", - "2. use `git` to download the Spyglass repository.\n", - "3. navigate to the newly downloaded directory.\n", - "4. create a `mamba` environment with either the standard `environment.yml` or\n", + "1. Navigate to your project directory.\n", + "2. Use `git` to download the Spyglass repository.\n", + "3. Navigate to the newly downloaded directory.\n", + "4. Create a `mamba` environment with either the standard `environment.yml` or\n", " the `environment_position.yml`, if you intend to use the full position\n", " pipeline. The latter will take longer to install.\n", - "5. open this notebook with VSCode\n", + "5. Open this notebook with VSCode\n", "\n", "Commands for the steps above ...\n", "\n", @@ -88,19 +230,61 @@ "code notebooks/00_Setup.ipynb # 5\n", "```\n", "\n", - "_Note:_ Spyglass is also installable via\n", - "[pip]()\n", - "and [pypi](https://pypi.org/project/spyglass-neuro/) with\n", - "`pip install spyglass-neuro`, but downloading from GitHub will also download\n", - "other files.\n", - "\n", "Next, within VSCode,\n", "[select the kernel](https://code.visualstudio.com/docs/datascience/jupyter-kernel-management)\n", "that matches your spyglass environment created with `mamba`. To use other Python\n", "interfaces, be sure to activate the environment: `conda activate spyglass`\n", "\n", - "See [this guide](https://datajoint.com/docs/elements/user-guide/) for additional\n", - "details on each of these programs and the role they play in using the pipeline.\n" + "\n", + "### Considerations\n", + "\n", + "1. Spyglass is also installable via\n", + "[pip]()\n", + "and [pypi](https://pypi.org/project/spyglass-neuro/) with\n", + "`pip install spyglass-neuro`, but downloading from GitHub will also download\n", + "other files, like this tutorial.\n", + "2. Developers who wish to work on the code base may want to do an editable\n", + "install from within their conda environment: `pip install -e /path/to/spyglass/`\n" + ] + }, + { + "cell_type": "markdown", + "id": "aa254c25", + "metadata": {}, + "source": [ + "### Optional Dependencies\n", + "\n", + "Some pipelines require installation of additional packages.\n", + "\n", + "#### Spike Sorting\n", + "\n", + "The spike sorting pipeline relies on `spikeinterface` and optionally\n", + "`mountainsort4`.\n", + "\n", + "```bash\n", + "conda activate \n", + "pip install spikeinterface[full,widgets]\n", + "pip install mountainsort4\n", + "```\n", + "\n", + "#### LFP\n", + "\n", + "The LFP pipeline uses `ghostipy`.\n", + "\n", + "__WARNING:__ If you are on an M1 Mac, you need to install `pyfftw` via `conda`\n", + "BEFORE installing `ghostipy`:\n", + "\n", + "```bash\n", + "conda install -c conda-forge pyfftw # for M1 Macs\n", + "pip install ghostipy\n", + "```\n", + "\n", + "#### Decoding\n", + "\n", + "The Decoding pipeline relies on `jax` to process data with GPUs. Please see\n", + "their conda installation steps\n", + "[here](https://jax.readthedocs.io/en/latest/installation.html#conda-installation).\n", + "\n" ] }, { @@ -120,7 +304,7 @@ "\n", "1. Connect to an existing database.\n", "2. Run your own database with [Docker](#running-your-own-database)\n", - "3. JupyterHub (coming soon...)\n", + "3. JupyterHub (database pre-configured, skip this step)\n", "\n", "Your choice above should result in a set of credentials, including host name,\n", "host port, user name, and password. Note these for the next step.\n", @@ -155,7 +339,9 @@ "Connecting to an existing database will require a user name and password.\n", "Please contact your database administrator for this information.\n", "\n", - "Frank Lab members should contact Chris.\n" + "For persistent databases with backups, administrators should review our \n", + "documentation on \n", + "[database management](https://lorenfranklab.github.io/spyglass/latest/ForDevelopers/Database).\n" ] }, { @@ -202,10 +388,10 @@ "\n", "Docker credentials are as follows:\n", "\n", - "- Host: localhost\n", - "- Password: tutorial\n", - "- User: root\n", - "- Port: 3306\n" + "- Host: `localhost`\n", + "- User: `root`\n", + "- Password: `tutorial`\n", + "- Port: `3306`\n" ] }, { @@ -213,7 +399,7 @@ "id": "706d0ed5", "metadata": {}, "source": [ - "### Config and Connecting to the database\n" + "### Config\n" ] }, { @@ -221,20 +407,27 @@ "id": "22d3b72d", "metadata": {}, "source": [ - "Spyglass can load settings from either a DataJoint config file (recommended) or\n", - "environmental variables. The code below will generate a config file, but we\n", - "first need to decide a 'base path'. This is generally the parent directory\n", - "where the data will be stored, with subdirectories for `raw`, `analysis`, and\n", - "other data folders. If they don't exist already, they will be created.\n", + "Spyglass will load settings the 'custom' section of your DataJoint config file.\n", + "The code below will generate a config\n", + "file, but we first need to decide a 'base path'. This is generally the parent\n", + "directory where the data will be stored, with subdirectories for `raw`,\n", + "`analysis`, and other data folders. If they don't exist already, they will be\n", + "created relative to the base path specified with their default names. \n", + "\n", + "A temporary directory is one such subfolder (default `base-dir/tmp`) to speed\n", + "up spike sorting. Ideally, this folder should have ~500GB free.\n", "\n", "The function below will create a config file (`~/.datajoint.config` if global,\n", - "`./dj_local_conf.json` if local). Local is recommended for the notebooks, as\n", + "`./dj_local_conf.json` if local).\n", + "See also [DataJoint docs](https://datajoint.com/docs/core/datajoint-python/0.14/quick-start/#connection).\n", + "Local is recommended for the notebooks, as\n", "each will start by loading this file. Custom json configs can be saved elsewhere, but will need to be loaded in startup with\n", "`dj.config.load('your-path')`.\n", "\n", - "To point spyglass to a folder elsewhere (e.g., an external drive for waveform\n", - "data), simply edit the json file. Note that the `raw` and `analysis` paths\n", - "appear under both `stores` and `custom`.\n" + "To point Spyglass to a folder elsewhere (e.g., an external drive for waveform\n", + "data), simply edit the resulting json file. Note that the `raw` and `analysis` paths\n", + "appear under both `stores` and `custom`. Spyglass will check that these match\n", + "on startup and log a warning if not.\n" ] }, { @@ -256,12 +449,67 @@ " base_dir=\"/path/like/stelmo/nwb/\",\n", " database_user=\"your username\",\n", " database_password=\"your password\", # remove this line for shared machines\n", - " database_host=\"localhost or lmf-db.cin.ucsf.edu\",\n", + " database_host=\"localhost or lmf-db.cin.ucsf.edu\", # only list one\n", " database_port=3306,\n", " set_password=False,\n", ")" ] }, + { + "cell_type": "markdown", + "id": "a1c20b5b", + "metadata": {}, + "source": [ + "
Legacy config\n", + "\n", + "Older versions of Spyglass relied exclusively on environment variables for\n", + "config. If `spyglass_dirs` is not found in the config file, Spyglass will look\n", + "for environment variables. These can be set either once in a terminal session,\n", + "or permanently in a unix settings file (e.g., `.bashrc` or `.bash_profile`) in\n", + "your home directory.\n", + "\n", + "```bash\n", + "export SPYGLASS_BASE_DIR=\"/stelmo/nwb\"\n", + "export SPYGLASS_RECORDING_DIR=\"$SPYGLASS_BASE_DIR/recording\"\n", + "export SPYGLASS_SORTING_DIR=\"$SPYGLASS_BASE_DIR/sorting\"\n", + "export SPYGLASS_VIDEO_DIR=\"$SPYGLASS_BASE_DIR/video\"\n", + "export SPYGLASS_WAVEFORMS_DIR=\"$SPYGLASS_BASE_DIR/waveforms\"\n", + "export SPYGLASS_TEMP_DIR=\"$SPYGLASS_BASE_DIR/tmp\"\n", + "export KACHERY_CLOUD_DIR=\"$SPYGLASS_BASE_DIR/.kachery-cloud\"\n", + "export KACHERY_TEMP_DIR=\"$SPYGLASS_BASE_DIR/tmp\"\n", + "export DJ_SUPPORT_FILEPATH_MANAGEMENT=\"TRUE\"\n", + "```\n", + "\n", + "To load variables from a `.bashrc` file, run `source ~/.bashrc` in a terminal.\n", + "\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "f2ef434f", + "metadata": {}, + "source": [ + "### Managing Files\n", + "\n", + "[`kachery-cloud`](https://github.com/flatironinstitute/kachery-cloud) is a file\n", + "manager for collaborators to share files. This is an optional dependency for\n", + "collaborating teams who don't have direct access to one another's disk space, \n", + "but want to share a MySQL database instance.\n", + "To customize `kachery` file paths, see `dj_local_conf_example.json`. \n", + "\n", + "To set up a new `kachery` instance for your project, contact maintainers\n", + "of this package." + ] + }, + { + "cell_type": "markdown", + "id": "38679c3a", + "metadata": {}, + "source": [ + "### Connecting" + ] + }, { "cell_type": "markdown", "id": "06eef771", @@ -294,16 +542,17 @@ "metadata": {}, "source": [ "If you see an error saying `Could not find SPYGLASS_BASE_DIR`, try loading your\n", - "config before importing Spyglass, try setting this as an environmental variable\n", - "before importing Spyglass.\n", + "config before importing Spyglass. \n", "\n", "```python\n", - "os.environ['SPYGLASS_BASE_DIR'] = '/your/base/path'\n", - "\n", - "import spyglass\n", - "from spyglass.settings import SpyglassConfig\n", "import datajoint as dj\n", - "print(SpyglassConfig().config)\n", + "dj.config.load('/your/config/path')\n", + "\n", + "from spyglass.common import Session\n", + "\n", + "Session()\n", + "\n", + "# If successful...\n", "dj.config.save_local() # or global\n", "```\n" ] @@ -321,7 +570,7 @@ "id": "c6850095", "metadata": {}, "source": [ - "Next, we'll try [inserting data](./01_Insert_Data.ipynb)\n" + "Next, we'll try [introduce some concepts](./01_Concepts.ipynb)\n" ] } ], @@ -341,7 +590,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.9.19" } }, "nbformat": 4, diff --git a/notebooks/01_Concepts.ipynb b/notebooks/01_Concepts.ipynb new file mode 100644 index 000000000..2c3d535d1 --- /dev/null +++ b/notebooks/01_Concepts.ipynb @@ -0,0 +1,218 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# Concepts\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Intro\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "_Developer Note:_ if you may make a PR in the future, be sure to copy this\n", + "notebook, and use the `gitignore` prefix `temp` to avoid future conflicts.\n", + "\n", + "This is one notebook in a multi-part series on Spyglass. To set up your Spyglass environment and database, see\n", + "[the Setup notebook](./00_Setup.ipynb)\n", + "\n", + "This notebook will introduce foundational concepts that will help in\n", + "understanding how to work with Spyglass pipelines.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Other materials\n", + "\n", + "DataJoint is an \"Object-relational mapping\" tool, which means that it gives us\n", + "a Python object for tables that exist on a shared SQL server. Many Spyglass\n", + "imports are DataJoint tables like this.\n", + "\n", + "Any 'introduction to SQL' will give an overview of relational data models as\n", + "a primer on how DataJoint tables within Spyglass will interact with one-another,\n", + "and the ways we can interact with them. A quick primer may help with the\n", + "specifics ahead.\n", + "\n", + "For an overview of DataJoint, including table definitions and inserts, see\n", + "[DataJoint tutorials](https://github.com/datajoint/datajoint-tutorials)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Common Errors\n", + "\n", + "Skip this for now, but refer back if you hit issues.\n", + "\n", + "\n", + "### Integrity\n", + "\n", + "```console\n", + "IntegrityError: Cannot add or update a child row: a foreign key constraint fails (`schema`.`_table`, CONSTRAINT `_table_ibfk_1` FOREIGN KEY (`parent_field`) REFERENCES `other_schema`.`parent_name` (`parent_field`) ON DELETE RESTRICT ON UPDATE CASCADE)\n", + "```\n", + "\n", + "`IntegrityError` during `insert` means that some part of the key you're\n", + "inserting doesn't exist in the parent of the table you're inserting into. You\n", + "can explore which that may be by doing the following...\n", + "\n", + "```python\n", + "my_key = dict(value=key) # whatever you're inserting\n", + "MyTable.insert1(my_key) # error here\n", + "parents = MyTable.parents(as_objects=True) # get the parents as FreeTables\n", + "for parent in parents: # iterate through the parents, with only relevant fields\n", + " parent_key = {k: v for k, v in my_key.items() if k in parent.heading.names}\n", + " print(parent & parent_key) # restricted parent\n", + "```\n", + "\n", + "If any of the printed tables are empty, you know you need to insert into that\n", + "table (or another ancestor up the pipeline) first. This code will not work if\n", + "there are aliases in the table (i.e., `proj` in the definition). In that case,\n", + "you'll need to modify your `parent_key` to reflect the renaming.\n", + "\n", + "The error message itself will tell you which table is the limiting parent. After\n", + "`REFERENCES` in the error message, you'll see the parent table and the column\n", + "that is causing the error.\n", + "\n", + "### Permission\n", + "\n", + "```console\n", + "('Insufficient privileges.', \"INSERT command denied to user 'username'@'127.0.0.1' for table '_table_name'\", 'INSERT INTO `schema_name`.`table_name`(`field1`,`field2`) VALUES (%s,%s)')\n", + "```\n", + "\n", + "This is a MySQL error that means that either ...\n", + "\n", + "- You don't have access to the command you're trying to run (e.g., `INSERT`)\n", + "- You don't have access to this command on the schema you're trying to run it on\n", + "\n", + "To see what permissions you have, you can run the following ...\n", + "\n", + "```python\n", + "dj.conn().query(\"SHOW GRANTS FOR CURRENT_USER();\").fetchall()\n", + "```\n", + "\n", + "If you think you should have access to the command, you contact your database\n", + "administrator (e.g., Chris in the Frank Lab). Please share the output of the\n", + "above command with them.\n", + "\n", + "### Type\n", + "\n", + "```console\n", + "TypeError: example_function() got an unexpected keyword argument 'this_arg'\n", + "```\n", + "\n", + "This means that you're calling a function with an argument that it doesn't\n", + "expect (e.g., `example_function(this_arg=5)`). You can check the function's\n", + "accepted arguments by running `help(example_function)`.\n", + "\n", + "```console\n", + "TypeError: 'NoneType' object is not iterable\n", + "```\n", + "\n", + "This means that some function is trying to do something with an object of an\n", + "unexpected type. For example, if might by running `for item in variable: ...`\n", + "when `variable` is `None`. You can check the type of the variable by going into\n", + "debug mode and running `type(variable)`.\n", + "\n", + "### KeyError\n", + "\n", + "```console\n", + "KeyError: 'field_name'\n", + "```\n", + "\n", + "This means that you're trying to access a key in a dictionary that doesn't\n", + "exist. You can check the keys of the dictionary by running `variable.keys()`. If\n", + "this is in your custom code, you can get a key and supply a default value if it\n", + "doesn't exist by running `variable.get('field_name', default_value)`.\n", + "\n", + "### DataJoint\n", + "\n", + "```console\n", + "DataJointError(\"Attempt to delete part table {part} before deleting from its master {master} first.\")\n", + "```\n", + "\n", + "This means that DataJoint's delete process found a part table with a foreign key\n", + "reference to the data you're trying to delete. You need to find the master table\n", + "listed and delete from that table first.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Debug Mode\n", + "\n", + "To fix an error, you may want to enter 'debug mode'. VSCode has a dedicated\n", + "featureful [extension](https://code.visualstudio.com/docs/python/debugging)\n", + "for making use of the UI, but you can choose to use Python's built-in tool.\n", + "\n", + "To enter into debug mode, you can add the following line to your code ...\n", + "\n", + "```python\n", + "__import__(\"pdb\").set_trace()\n", + "```\n", + "\n", + "This will set a breakpoint in your code at that line. When you run your code, it\n", + "will pause at that line and you can explore the variables in the current frame.\n", + "Commands in this mode include ...\n", + "\n", + "- `u` and `d` to move up and down the stack\n", + "- `l` to list the code around the current line\n", + "- `q` to quit the debugger\n", + "- `c` to continue running the code\n", + "- `h` for help, which will list all the commands\n", + "\n", + "`ipython` and jupyter notebooks can launch a debugger automatically at the last\n", + "error by running `%debug`.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Up Next\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we'll try [inserting data](./01_Insert_Data.ipynb)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "spy", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/01_Insert_Data.ipynb b/notebooks/02_Insert_Data.ipynb similarity index 94% rename from notebooks/01_Insert_Data.ipynb rename to notebooks/02_Insert_Data.ipynb index a92e14ca0..d8ef86233 100644 --- a/notebooks/01_Insert_Data.ipynb +++ b/notebooks/02_Insert_Data.ipynb @@ -1062,6 +1062,48 @@ "sgc.LabTeam.LabTeamMember()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In general, we can insert into any table in this say, by supplying \n", + "a dictionary (or list of dictionaries) with all the fields mentioned in \n", + "`Table.heading.names` so long as the data types match what is described in\n", + "`Table.heading`\n", + "\n", + "```python\n", + "Table.insert1({'a': 1, 'b': 'other'}) # only one entry\n", + "Table.insert([{'a':1, 'b': 'other'}, {'a':1, 'b': 'next'}]) # multiple\n", + "```\n", + "\n", + "For example ..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sgc.ProbeType.insert1(\n", + " {\n", + " \"probe_type\": \"128c-4s6mm6cm-15um-26um-sl\",\n", + " \"probe_description\": \"A Livermore flexible probe with 128 channels ...\",\n", + " \"manufacturer\": \"Lawrence Livermore National Lab\",\n", + " \"num_shanks\": 4,\n", + " },\n", + " skip_duplicates=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `skip_duplicates` flag tells DataJoint not to raise an error if the data\n", + "is already in the table. This should only be used in special cases." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -2734,6 +2776,196 @@ "!ls $SPYGLASS_BASE_DIR/raw" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## YAML Inserts\n", + "\n", + "The following step is an optional feature, and not required for the remaining\n", + "notebooks.\n", + "\n", + "Not every NWB file has all the information required by Spyglass. For example,\n", + "many NWB files do not contain any information about the `DataAcquisitionDevice`\n", + "or `Probe` because NWB does not yet have an official standard for specifying\n", + "them. Or, information in the NWB file may need correcting. For example,\n", + "the NWB file specifies the lab name as the \"Loren Frank Lab\", but your lab table expects \"Frank Lab\".\n", + "\n", + "Manual inserts can either be done on tables directly (e.g., \n", + "`Table.insert1(my_dict)`), or done in batch with `yaml` files. This is done in\n", + "two steps: \n", + "\n", + "1. Generate data to be entered.\n", + "2. Associate data with one or more NWB files.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Batch Insert\n", + "\n", + "First, Spyglass will check for an `entries.yaml` file at the base directory\n", + "(see [Setup](./00_Setup.ipynb)) and run all corresponding inserts. \n", + "This is a great place to define entries that the database should auto-insert\n", + "prior to ingesting any NWB files. An example can be found in\n", + "`examples/config_yaml/entries.yaml`. It has the following structure:\n", + "\n", + "```yaml\n", + "TableName:\n", + " - TableEntry1Field1: Value\n", + "\n", + "TableEntry1Field2:\n", + " - TableEntry2Field1: Value\n", + "\n", + "TableEntry2Field2: Value\n", + "```\n", + "\n", + "For example,\n", + "\n", + "```yaml\n", + "ProbeType:\n", + " - probe_type: 128c-4s6mm6cm-15um-26um-sl\n", + " probe_description: A Livermore flexible probe with 128 channels, 4 shanks, \n", + " 6 mm shank length, 6 cm ribbon length. 15 um contact diameter, 26 um \n", + " center-to-center distance (pitch), single-line configuration.\n", + " manufacturer: Lawrence Livermore National Lab\n", + " num_shanks: 4\n", + "```\n", + "\n", + "Using a YAML file over data stored in Python scripts helps maintain records\n", + "of data entries in a human-readable file. For ways to share a state of the\n", + "database, see our [export tutorial](./05_Export.ipynb).\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pairing with NWBs\n", + "\n", + "Next, we'll create a _configuration file_ to override values in a given NWB\n", + "(e.g., \"Loren Frank Lab\" -> \"Frank Lab\"). This must be done in the same\n", + "directory as the NWB file that it configures and have the following naming\n", + "convention: `_spyglass_config.yaml`. This file is then read by\n", + "Spyglass when calling `insert_session` on the associated NWB file.\n", + "\n", + "An example of this can be found at\n", + "`examples/config_yaml/​​sub-AppleBottom_ses-AppleBottom-DY20-g3_behavior+ecephys_spyglass_config.yaml`.\n", + "\n", + "This file is associated with the NWB file\n", + "`sub-AppleBottom_ses-AppleBottom-DY20-g3_behavior+ecephys.nwb`.\n", + "\n", + "This is the general format for the config entry:\n", + "\n", + "```yaml\n", + "TableName:\n", + " - primary_key1: value1\n", + "```\n", + "\n", + "For example:\n", + "\n", + "```yaml\n", + "Lab:\n", + " - lab_name: Frank Lab\n", + "DataAcquisitionDevice:\n", + " - data_acquisition_device_name: Neuropixels Recording Device\n", + "```\n", + "\n", + "In this example, the NWB file that corresponds to this config YAML will become\n", + "associated with the Lab primary key 'Frank Lab' and the DataAcquisitionDevice\n", + "with primary key 'Neuropixels Recording Device'. This entry must already exist." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example Ingestion with Real Data\n", + "\n", + "For this example, you will need to download the 5 GB NWB file \n", + "`sub-JDS-NFN-AM2_behavior+ecephys.nwb`\n", + "from dandiset 000447 here: \n", + "https://dandiarchive.org/dandiset/000447/0.230316.2133/files?location=sub-JDS-NFN-AM2&page=1\n", + "\n", + "Click the download arrow button to download the file to your computer. Add it to\n", + " the folder containing your raw NWB data to be ingested into Spyglass.\n", + "\n", + "This file does not specify a data acquisition device. Let's say that the\n", + "data was collected from a SpikeGadgets system with an Intan amplifier. This\n", + "matches an existing entry in the `DataAcquisitionDevice` table with name\n", + "\"data_acq_device0\". We will create a configuration YAML file to associate\n", + "this entry with the NWB file.\n", + "\n", + "If you are connected to the Frank lab database, please rename any downloaded\n", + "files (e.g., `example20200101_yourname.nwb`) to avoid naming collisions, as the\n", + "file name acts as the primary key across key tables." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "nwb_file_name = \"sub-JDS-NFN-AM2_behavior+ecephys_rly.nwb\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this configuration yaml file should be placed next to the downloaded NWB file\n", + "yaml_config_path = \"sub-JDS-NFN-AM2_behavior+ecephys_rly_spyglass_config.yaml\"\n", + "with open(yaml_config_path, \"w\") as config_file:\n", + " lines = [\n", + " \"DataAcquisitionDevice\",\n", + " \"- data_acquisition_device_name: data_acq_device0\",\n", + " ]\n", + " config_file.writelines(line + \"\\n\" for line in lines)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then call `insert_sessions` as usual." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import spyglass.data_import as sgi\n", + "\n", + "sgi.insert_sessions(nwb_file_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Confirm the session was inserted with the correct `DataAcquisitionDevice`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import spyglass.common as sgc\n", + "from spyglass.utils.nwb_helper_fn import get_nwb_copy_filename\n", + "\n", + "nwb_copy_file_name = get_nwb_copy_filename(nwb_file_name)\n", + "\n", + "sgc.Session.DataAcquisitionDevice & {\"nwb_file_name\": nwb_copy_file_name}" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/notebooks/02_Data_Sync.ipynb b/notebooks/03_Data_Sync.ipynb similarity index 100% rename from notebooks/02_Data_Sync.ipynb rename to notebooks/03_Data_Sync.ipynb diff --git a/notebooks/03_Merge_Tables.ipynb b/notebooks/04_Merge_Tables.ipynb similarity index 100% rename from notebooks/03_Merge_Tables.ipynb rename to notebooks/04_Merge_Tables.ipynb diff --git a/notebooks/04_PopulateConfigFile.ipynb b/notebooks/04_PopulateConfigFile.ipynb deleted file mode 100644 index 4ead237fb..000000000 --- a/notebooks/04_PopulateConfigFile.ipynb +++ /dev/null @@ -1,273 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "68303e8a", - "metadata": {}, - "source": [ - "# Customizing Data Insertion into Spyglass\n", - "\n", - "If you would like to insert data into Spyglass that does not\n", - "follow the naming or organizational format expected by Spyglass, \n", - "or you would like to override what values are ingested into Spyglass from \n", - "your NWB files, including missing values, follow this guide.\n", - "\n", - "## General Approach\n", - "\n", - "When an NWB file is ingested into Spyglass, metadata about the session\n", - "is first read from the NWB file and inserted into\n", - "tables in the `common` module (e.g. `Institution`, `Lab`, `Electrode`, etc).\n", - "However, not every NWB file has all the information required by Spyglass or\n", - "the information in the NWB file is not in a format that Spyglass expects. For\n", - "example, many NWB files do not contain information about the\n", - "`DataAcquisitionDevice` or `Probe` because the NWB data standard does not yet\n", - "have an official\n", - "standard for specifying them. For these cases, we provide a way to customize\n", - "how data is ingested into Spyglass.\n", - "\n", - "Let's say that you want to ingest an NWB file into Spyglass where the lab name\n", - "in the file is written as \"Loren Frank Lab\" or it is not specified, but you \n", - "know the data comes from the Loren Frank Lab. Let's say that in Spyglass,\n", - "the lab name that is associated with sessions from the Loren Frank Lab is \n", - "\"Frank Lab\" and you would like to use the same name in order to facilitate\n", - "data search in Spyglass. To change the lab name when you insert your new data \n", - "to Spyglass, you could either 1) edit the NWB file directly and then \n", - "insert it into Spyglass, or 2) define an override value \"Frank Lab\" to be \n", - "used instead of the value specified in the NWB file (or lack thereof).\n", - "\n", - "Note that if this is your own data and you want to make changes to\n", - "information about how the data is interpreted, e.g., the units of measurement\n", - "are incorrect, we recommend that you edit the NWB file directly because the \n", - "file or derivatives of it might eventually be shared outside of Spyglass and \n", - "they will not reflect any modifications that you have made to \n", - "the data only in Spyglass." - ] - }, - { - "cell_type": "markdown", - "id": "bcc87f67", - "metadata": {}, - "source": [ - "## Define a Configuration YAML File\n", - "\n", - "To override values in the NWB file during insertion into Spyglass, \n", - "you will need to create a configuration \n", - "YAML file that lives in the same directory as your NWB file, named: \n", - "`_spyglass_config.yaml`\n", - "\n", - "An example configuration YAML file can be found at\n", - "`examples/config_yaml/​​sub-AppleBottom_ses-AppleBottom-DY20-g3_behavior+ecephys_spyglass_config.yaml`.\n", - "This file is associated with the NWB file\n", - "`sub-AppleBottom_ses-AppleBottom-DY20-g3_behavior+ecephys.nwb`.\n", - "\n", - "This is the general format for entries in this configuration file:\n", - "\n", - "```yaml\n", - "TableName:\n", - "- primary_key1: value1\n", - "```\n", - "\n", - "For example:\n", - "\n", - "```yaml\n", - "Lab:\n", - "- lab_name: Frank Lab\n", - "```\n", - "\n", - "In this example, the NWB file that corresponds to this config YAML will become\n", - "associated with the entry in the `Lab` table with the value `Frank Lab` for \n", - "the primary key `lab_name`. This entry must already exist. More specifically,\n", - "when the NWB file is ingested into Spyglass, \n", - "a new `Session` entry will be created for the NWB file that has a foreign key to\n", - "the `Lab` entry with `lab_name` = \"Frank Lab\", ignoring whatever lab value is \n", - "in the NWB file, even if one does not exist.\n", - "\n", - "TODO implement this for `Lab`.\n" - ] - }, - { - "cell_type": "markdown", - "id": "fc6d0986", - "metadata": {}, - "source": [ - "## Create Entries to Reference in the Configuration YAML\n", - "\n", - "As mentioned earlier, the table entry that you want to associate with your NWB\n", - "file must already exist in the database. This entry would typically be a value\n", - "that is independent of any particular NWB file, such as\n", - "`DataAcquisitionDevice`, `Lab`, `Probe`, and `BrainRegion`. \n", - "\n", - "If the entry does not already exist, you can either:\n", - "1) create the entry programmatically using DataJoint `insert` commands, or\n", - "2) define the entry in a YAML file called `entries.yaml` that is automatically\n", - " processed when Spyglass is imported. You can think of `entries.yaml` as a\n", - " place to define information that the database should come pre-equipped prior\n", - " to ingesting your NWB files. The `entries.yaml` file should be placed in the\n", - " `spyglass` base directory (next to `README.md`). An example can be found in\n", - " `examples/config_yaml/entries.yaml`. This file should have the following\n", - " structure:\n", - "\n", - " ```yaml\n", - " TableName:\n", - " - TableEntry1Field1: Value\n", - " TableEntry1Field2: Value\n", - " - TableEntry2Field1: Value\n", - " TableEntry2Field2: Value\n", - " ```\n", - "\n", - " For example,\n", - "\n", - " ```yaml\n", - " DataAcquisitionDeviceSystem:\n", - " data_acquisition_device_system: SpikeGLX\n", - " DataAcquisitionDevice:\n", - " - data_acquisition_device_name: Neuropixels_SpikeGLX\n", - " data_acquisition_device_system: SpikeGLX\n", - " data_acquisition_device_amplifier: Intan\n", - " ```\n", - "\n", - " Only `dj.Manual`, `dj.Lookup`, and `dj.Part` tables can be populated\n", - " using this approach.\n", - "\n", - "Once the entry that you want to associate with your NWB file exists in the\n", - "database, you can write the configuration YAML file and then ingest your\n", - "NWB file. As an another example, let's say that you want to associate your NWB\n", - "file with the `DataAcquisitionDevice` entry with `data_acquisition_device_name`\n", - "= \"Neuropixels_SpikeGLX\" that was defined above. You would write the following\n", - "configuration YAML file:\n", - "\n", - "```yaml\n", - "DataAcquisitionDevice:\n", - "- data_acquisition_device_name: Neuropixels_SpikeGLX\n", - "```\n", - "\n", - "The example in\n", - "`examples/config_yaml/​​sub-AppleBottom_ses-AppleBottom-DY20-g3_behavior+ecephys_spyglass_config.yaml`\n", - "includes additional examples." - ] - }, - { - "cell_type": "markdown", - "id": "9d641e00", - "metadata": {}, - "source": [ - "## Example Ingestion with Real Data\n", - "\n", - "For this example, you will need to download the 5 GB NWB file \n", - "`sub-JDS-NFN-AM2_behavior+ecephys.nwb`\n", - "from dandiset 000447 here: \n", - "https://dandiarchive.org/dandiset/000447/0.230316.2133/files?location=sub-JDS-NFN-AM2&page=1\n", - "\n", - "Click the download arrow button to download the file to your computer. Add it to the folder\n", - "containing your raw NWB data to be ingested into Spyglass.\n", - "\n", - "This file does not specify a data acquisition device. Let's say that the\n", - "data was collected from a SpikeGadgets system with an Intan amplifier. This\n", - "matches an existing entry in the `DataAcquisitionDevice` table with name\n", - "\"data_acq_device0\". We will create a configuration YAML file to associate\n", - "this entry with the NWB file.\n", - "\n", - "If you are connected to the Frank lab database, please rename any downloaded\n", - "files (e.g., `example20200101_yourname.nwb`) to avoid naming collisions, as the\n", - "file name acts as the primary key across key tables." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "37aa5182", - "metadata": {}, - "outputs": [], - "source": [ - "nwb_file_name = \"sub-JDS-NFN-AM2_behavior+ecephys_rly.nwb\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "aab5b47a", - "metadata": {}, - "outputs": [], - "source": [ - "# this configuration yaml file should be placed next to the downloaded NWB file\n", - "yaml_config_path = \"sub-JDS-NFN-AM2_behavior+ecephys_rly_spyglass_config.yaml\"\n", - "with open(yaml_config_path, \"w\") as config_file:\n", - " lines = [\n", - " \"DataAcquisitionDevice\",\n", - " \"- data_acquisition_device_name: data_acq_device0\",\n", - " ]\n", - " config_file.writelines(line + \"\\n\" for line in lines)" - ] - }, - { - "cell_type": "markdown", - "id": "d132e797", - "metadata": {}, - "source": [ - "Then call `insert_sessions` as usual." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bed5c6e1", - "metadata": {}, - "outputs": [], - "source": [ - "import spyglass.data_import as sgi\n", - "\n", - "sgi.insert_sessions(nwb_file_name)" - ] - }, - { - "cell_type": "markdown", - "id": "d875b158", - "metadata": {}, - "source": [ - "Confirm the session was inserted with the correct `DataAcquisitionDevice`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8411cb43", - "metadata": {}, - "outputs": [], - "source": [ - "import spyglass.common as sgc\n", - "from spyglass.utils.nwb_helper_fn import get_nwb_copy_filename\n", - "\n", - "nwb_copy_file_name = get_nwb_copy_filename(nwb_file_name)\n", - "\n", - "sgc.Session.DataAcquisitionDevice & {\"nwb_file_name\": nwb_copy_file_name}" - ] - }, - { - "cell_type": "markdown", - "id": "d85b1416", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.18" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/10_Spike_SortingV0.ipynb b/notebooks/10_Spike_SortingV0.ipynb index d376db4b3..3497a461c 100644 --- a/notebooks/10_Spike_SortingV0.ipynb +++ b/notebooks/10_Spike_SortingV0.ipynb @@ -4,7 +4,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Spike Sorting\n" + "# Spike Sorting V0\n", + "\n", + "_Note_: This notebook explains the first version of the spike sorting pipeline\n", + "and is preserved for using existing data. New users should use\n", + "[V1](./10_Spike_SortingV1.ipynb).\n" ] }, { diff --git a/notebooks/21_DLC.ipynb b/notebooks/21_DLC.ipynb index b976ae5eb..c2e151b89 100644 --- a/notebooks/21_DLC.ipynb +++ b/notebooks/21_DLC.ipynb @@ -2042,6 +2042,41 @@ "sgp.DLCPosVideo().populate(dlc_key)" ] }, + { + "cell_type": "markdown", + "id": "04d1dca8", + "metadata": {}, + "source": [ + "
On editing parameters\n", + "\n", + "The presence of existing parameters in many tables makes it easy to tweak them \n", + "for your needs. You can fetch, edit, and re-insert new params - but the process\n", + "will look a little different if the table has a `=BLOB=` field.\n", + "\n", + "(These example assumes only one primary key. If multiple, `{'primary_key': 'x'}`\n", + "and `['primary_key']` will need to be adjusted accordingly.)\n", + "\n", + "No blob means that all parameters are fields in the table.\n", + "\n", + "```python\n", + "existing_params = (MyParamsTable & {'primary_key':'x'}).fetch1()\n", + "new_params = {**existing_params, 'primary_key': 'y', 'my_variable': 'a', 'other_variable':'b'}\n", + "MyParamsTable.insert1(new_params)\n", + "```\n", + "\n", + "A blob means that the params are stored as an embedded dictionary. We'll assume\n", + "this column is called `params`\n", + "\n", + "```python\n", + "existing_params = (MyParamsTable & {'primary_key':'x'}).fetch1()\n", + "new_params = {**existing_params, 'primary_key': 'y'}\n", + "print(existing_params['params']) # check existing values\n", + "new_params['params'] = {**existing_params['params'], 'my_variable': 'a', 'other_variable':'b'}\n", + "```\n", + "\n", + "
" + ] + }, { "cell_type": "markdown", "id": "5a68bba8-9871-40ac-84c9-51ac0e76d44e", diff --git a/notebooks/README.md b/notebooks/README.md index 62b136240..0982c464f 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -1,21 +1,25 @@ # Tutorial Notebooks There are several paths one can take to these notebooks. The notebooks have -two-digits in their names, the first of which indicates it's 'batch', as +two-digits in their names, the first of which indicates its 'batch', as described in the categories below. ## 0. Intro Everyone should complete the [Setup](./00_Setup.ipynb) and -[Insert Data](./01_Insert_Data.ipynb) notebooks. +[Insert Data](./02_Insert_Data.ipynb) notebooks. The +[Concepts](./01_Concepts.ipynb) notebook offers additional information that will +help users understand the data structure and how to interact with it. -[Data Sync](./02_Data_Sync.ipynb) is an optional additional tool for +[Data Sync](./03_Data_Sync.ipynb) is an optional additional tool for collaborators that want to share analysis files. -The [Merge Tables notebook](./03_Merge_Tables.ipynb) explains details on a new -table tier unique to Spyglass that allows the user to use different versions of -pipelines on the same data. This is important for understanding the later -notebooks. +The [Merge Tables notebook](./04_Merge_Tables.ipynb) explains details on a +pipeline versioning technique unique to Spyglass. This is important for +understanding the later notebooks. + +The [Export notebook](./05_Export.ipynb) shows how to export data from the +database. ## 1. Spike Sorting Pipeline @@ -24,14 +28,14 @@ spike sorting to optional manual curation of the output of the automated sorting. Spikesorting results from any pipeline can then be organized and tracked using -tools in [Spikesorting Analysis](./11_Spike_Sorting_Agit analysis.ipynb) +tools in [Spikesorting Analysis](./11_Spikesorting_Analysis.ipynb). ## 2. Position Pipeline This series of notebooks covers tracking the position(s) of the animal. The user can employ two different methods: -1. the simple [Trodes](20_Position_Trodes.ipynb) methods of tracking LEDs on the +1. The simple [Trodes](20_Position_Trodes.ipynb) methods of tracking LEDs on the animal's headstage 2. [DLC (DeepLabCut)](./21_DLC.ipynb) which uses a neural network to track the animal's body parts. diff --git a/notebooks/py_scripts/00_Setup.py b/notebooks/py_scripts/00_Setup.py index 2ea726aa8..a9a7fe269 100644 --- a/notebooks/py_scripts/00_Setup.py +++ b/notebooks/py_scripts/00_Setup.py @@ -34,8 +34,13 @@ # ## Local environment # -# JupyterHub users can skip this step. Frank Lab members should first follow -# 'rec to nwb overview' steps on Google Drive to set up an ssh connection. +# Skip this step if you're ... +# +# 1. Running the tutorials on [JupyterHub](https://spyglass.hhmi.2i2c.cloud/) +# 2. A member of the Frank Lab members. Instead, ssh to a shared machine. +# + +# ### Tools # # For local use, download and install ... # @@ -55,15 +60,141 @@ # 4. [git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) for # downloading the repository, including notebooks. # +# See [this DataJoint guide](https://datajoint.com/docs/elements/user-guide/) for +# additional details on each of these programs and the role they play in using the +# pipeline. +# +#
Suggested VSCode settings +# +# Within the Spyglass repository, there is a `.vscode` folder with `json` files +# that specify limited settings and extensions intended for developers. The average +# user may benefit from the following fuller sets. +# +# We recommending these incrementally so you get a feel for what each one does +# before adding the next, and to avoid being overwhelmed by changes. +# +# 1. `extensions.json`. By updating this file, you'll add to the 'Recommended' +# section of the extensions tab. Each extension page will provide more information +# on the uses and benefits. Some relevant concepts include... +# - Linting: Warning of potential problems +# - Formatting: Auto-adjusting optional coding styles to align across users +# - Debugger: Progressive running of code. Please search for tutorials +# - Autocompletion: Prompting for potential options when coding +# +# ```json +# { +# "recommendations": [ +# // Python Extensions +# "charliermarsh.ruff", // Fast linter +# "donjayamanne.python-environment-manager", // Environment manager +# "kevinrose.vsc-python-indent", // Auto-indent when coding +# "ms-python.black-formatter", // Opinionated formatting +# "ms-python.debugpy", // Debugger +# "ms-python.isort", // Opinionated formatter for imports +# "ms-python.pylint", // Linter to support a DataJoint-specific linter +# "ms-python.python", // Language support for Python +# "ms-python.vscode-pylance", // Additional language support +# // Jupyter +# "ms-toolsai.jupyter", // Run notebooks in VSCode +# "ms-toolsai.jupyter-keymap", // Allow key-bindings +# "ms-toolsai.jupyter-renderers", // Display images +# // Autocompletion/Markdown +# "github.copilot", // Auto-suggest with copilot LLM +# "github.copilot-chat", // Add chat-box for questions to LLM +# "visualstudioexptteam.intellicode-api-usage-examples", // Prompt package options +# "visualstudioexptteam.vscodeintellicode", // Prompt Python-general options +# "davidanson.vscode-markdownlint", // Linter for markdown +# "streetsidesoftware.code-spell-checker", // Spell checker +# // SSH - Work on remote servers - Required for Frank Lab members +# "ms-vscode-remote.remote-ssh", +# "ms-vscode-remote.remote-ssh-edit", +# "ms-vscode.remote-explorer", +# ], +# "unwantedRecommendations": [] +# } +# ``` +# +# 2. `settings.json`. These can be places just in Spyglass, or added to your user +# settings file. Search settings in the command panel (cmd/ctrl+shift+P) to open +# this file directly. +# +# ```json +# { +# // GENERAL +# "editor.insertSpaces": true, // tab -> spaces +# "editor.rulers": [ 80 ], // vertical line at 80 +# "editor.stickyScroll.enabled": true, // Show scope at top +# "files.associations": { "*.json": "jsonc" }, // Load JSON with comments +# "files.autoSave": "onFocusChange", // Save on focus change +# "files.exclude": { // Hide these in the file viewer +# "**/__pycache*": true, // Add others with wildcards +# "**/.ipynb_ch*": true, +# }, +# "files.trimTrailingWhitespace": true, // Remove extra spaces in lines +# "git.enabled": true, // use git +# "workbench.editorAssociations": { // open file extension as given type +# "*.ipynb": "jupyter-notebook", +# }, +# // PYTHON +# "editor.defaultFormatter": "ms-python.black-formatter", // use black +# "[python]": { +# "editor.formatOnSave": true, +# "editor.defaultFormatter": "ms-python.black-formatter", +# "editor.codeActionsOnSave": { "source.organizeImports": "always"}, +# }, +# "python.analysis.autoImportCompletions": false, // Disable auto-import +# "python.languageServer": "Pylance", // Use Pylance +# "pylint.args": [ // DataJoint linter optional +# // "--load-plugins=datajoint_linter", // Requires pip installing +# // "--permit-dj-filepath=y", // Specific to datajoint_linter +# "--disable=E0401,E0102,W0621,W0401,W0611,W0614" +# ], +# // NOTEBOOKS +# "jupyter.askForKernelRestart": false, // Prevent dialog box on restart +# "jupyter.widgetScriptSources": ["jsdelivr.com", "unpkg.com"], // IPyWidgets +# "notebook.output.textLineLimit": 15, // Limit output +# "notebook.lineNumbers": "on", // Number lines in cells +# "notebook.formatOnSave.enabled": true, // blackify cells +# // AUTOCOMPLETION +# "editor.tabCompletion": "on", // tab over suggestions +# "github.copilot.editor.enableAutoCompletions": true, // Copilot +# "cSpell.enabled": true, // Spellcheck +# "cSpell.language": "en,en-US,companies,python,python-common", +# "cSpell.maxDuplicateProblems": 2, // Only mention a problem twice +# "cSpell.spellCheckDelayMs": 500, // Wait 0.5s after save +# "cSpell.userWords": [ "datajoint", "longblob", ], // Add words +# "cSpell.enableFiletypes": [ +# "!json", "markdown", "yaml", "python" // disable (!) json, check others +# ], +# "cSpell.logLevel": "Warning", // Only show warnings, can turn off +# // MARKDOWN +# "[markdown]": { // Use linter and format on save +# "editor.defaultFormatter": "DavidAnson.vscode-markdownlint", +# "editor.formatOnSave": true, +# }, +# "editor.codeActionsOnSave": { "source.fixAll.markdownlint": "explicit" }, +# "rewrap.reformat": true, // allows context-aware rewrapping +# "rewrap.wrappingColumn": 80, // Align with Black formatter +# } +# ``` +# +# The DataJoint linter is available at +# [this repository](https://github.com/CBroz1/datajoint_linter). +# +#
+ +# +# ### Installation +# # In a terminal, ... # -# 1. navigate to your project directory. -# 2. use `git` to download the Spyglass repository. -# 3. navigate to the newly downloaded directory. -# 4. create a `mamba` environment with either the standard `environment.yml` or +# 1. Navigate to your project directory. +# 2. Use `git` to download the Spyglass repository. +# 3. Navigate to the newly downloaded directory. +# 4. Create a `mamba` environment with either the standard `environment.yml` or # the `environment_position.yml`, if you intend to use the full position # pipeline. The latter will take longer to install. -# 5. open this notebook with VSCode +# 5. Open this notebook with VSCode # # Commands for the steps above ... # @@ -75,19 +206,56 @@ # code notebooks/00_Setup.ipynb # 5 # ``` # -# _Note:_ Spyglass is also installable via -# [pip]() -# and [pypi](https://pypi.org/project/spyglass-neuro/) with -# `pip install spyglass-neuro`, but downloading from GitHub will also download -# other files. -# # Next, within VSCode, # [select the kernel](https://code.visualstudio.com/docs/datascience/jupyter-kernel-management) # that matches your spyglass environment created with `mamba`. To use other Python # interfaces, be sure to activate the environment: `conda activate spyglass` # -# See [this guide](https://datajoint.com/docs/elements/user-guide/) for additional -# details on each of these programs and the role they play in using the pipeline. +# +# ### Considerations +# +# 1. Spyglass is also installable via +# [pip]() +# and [pypi](https://pypi.org/project/spyglass-neuro/) with +# `pip install spyglass-neuro`, but downloading from GitHub will also download +# other files, like this tutorial. +# 2. Developers who wish to work on the code base may want to do an editable +# install from within their conda environment: `pip install -e /path/to/spyglass/` +# + +# ### Optional Dependencies +# +# Some pipelines require installation of additional packages. +# +# #### Spike Sorting +# +# The spike sorting pipeline relies on `spikeinterface` and optionally +# `mountainsort4`. +# +# ```bash +# conda activate +# pip install spikeinterface[full,widgets] +# pip install mountainsort4 +# ``` +# +# #### LFP +# +# The LFP pipeline uses `ghostipy`. +# +# __WARNING:__ If you are on an M1 Mac, you need to install `pyfftw` via `conda` +# BEFORE installing `ghostipy`: +# +# ```bash +# conda install -c conda-forge pyfftw # for M1 Macs +# pip install ghostipy +# ``` +# +# #### Decoding +# +# The Decoding pipeline relies on `jax` to process data with GPUs. Please see +# their conda installation steps +# [here](https://jax.readthedocs.io/en/latest/installation.html#conda-installation). +# # # ## Database @@ -97,7 +265,7 @@ # # 1. Connect to an existing database. # 2. Run your own database with [Docker](#running-your-own-database) -# 3. JupyterHub (coming soon...) +# 3. JupyterHub (database pre-configured, skip this step) # # Your choice above should result in a set of credentials, including host name, # host port, user name, and password. Note these for the next step. @@ -122,7 +290,9 @@ # Connecting to an existing database will require a user name and password. # Please contact your database administrator for this information. # -# Frank Lab members should contact Chris. +# For persistent databases with backups, administrators should review our +# documentation on +# [database management](https://lorenfranklab.github.io/spyglass/latest/ForDevelopers/Database). # # ### Running your own database with Docker @@ -159,29 +329,36 @@ # # Docker credentials are as follows: # -# - Host: localhost -# - Password: tutorial -# - User: root -# - Port: 3306 +# - Host: `localhost` +# - User: `root` +# - Password: `tutorial` +# - Port: `3306` # -# ### Config and Connecting to the database +# ### Config # -# Spyglass can load settings from either a DataJoint config file (recommended) or -# environmental variables. The code below will generate a config file, but we -# first need to decide a 'base path'. This is generally the parent directory -# where the data will be stored, with subdirectories for `raw`, `analysis`, and -# other data folders. If they don't exist already, they will be created. +# Spyglass will load settings the 'custom' section of your DataJoint config file. +# The code below will generate a config +# file, but we first need to decide a 'base path'. This is generally the parent +# directory where the data will be stored, with subdirectories for `raw`, +# `analysis`, and other data folders. If they don't exist already, they will be +# created relative to the base path specified with their default names. +# +# A temporary directory is one such subfolder (default `base-dir/tmp`) to speed +# up spike sorting. Ideally, this folder should have ~500GB free. # # The function below will create a config file (`~/.datajoint.config` if global, -# `./dj_local_conf.json` if local). Local is recommended for the notebooks, as +# `./dj_local_conf.json` if local). +# See also [DataJoint docs](https://datajoint.com/docs/core/datajoint-python/0.14/quick-start/#connection). +# Local is recommended for the notebooks, as # each will start by loading this file. Custom json configs can be saved elsewhere, but will need to be loaded in startup with # `dj.config.load('your-path')`. # -# To point spyglass to a folder elsewhere (e.g., an external drive for waveform -# data), simply edit the json file. Note that the `raw` and `analysis` paths -# appear under both `stores` and `custom`. +# To point Spyglass to a folder elsewhere (e.g., an external drive for waveform +# data), simply edit the resulting json file. Note that the `raw` and `analysis` paths +# appear under both `stores` and `custom`. Spyglass will check that these match +# on startup and log a warning if not. # # + @@ -197,12 +374,49 @@ base_dir="/path/like/stelmo/nwb/", database_user="your username", database_password="your password", # remove this line for shared machines - database_host="localhost or lmf-db.cin.ucsf.edu", + database_host="localhost or lmf-db.cin.ucsf.edu", # only list one database_port=3306, set_password=False, ) # - +#
Legacy config +# +# Older versions of Spyglass relied exclusively on environment variables for +# config. If `spyglass_dirs` is not found in the config file, Spyglass will look +# for environment variables. These can be set either once in a terminal session, +# or permanently in a unix settings file (e.g., `.bashrc` or `.bash_profile`) in +# your home directory. +# +# ```bash +# export SPYGLASS_BASE_DIR="/stelmo/nwb" +# export SPYGLASS_RECORDING_DIR="$SPYGLASS_BASE_DIR/recording" +# export SPYGLASS_SORTING_DIR="$SPYGLASS_BASE_DIR/sorting" +# export SPYGLASS_VIDEO_DIR="$SPYGLASS_BASE_DIR/video" +# export SPYGLASS_WAVEFORMS_DIR="$SPYGLASS_BASE_DIR/waveforms" +# export SPYGLASS_TEMP_DIR="$SPYGLASS_BASE_DIR/tmp" +# export KACHERY_CLOUD_DIR="$SPYGLASS_BASE_DIR/.kachery-cloud" +# export KACHERY_TEMP_DIR="$SPYGLASS_BASE_DIR/tmp" +# export DJ_SUPPORT_FILEPATH_MANAGEMENT="TRUE" +# ``` +# +# To load variables from a `.bashrc` file, run `source ~/.bashrc` in a terminal. +# +#
+ +# ### Managing Files +# +# [`kachery-cloud`](https://github.com/flatironinstitute/kachery-cloud) is a file +# manager for collaborators to share files. This is an optional dependency for +# collaborating teams who don't have direct access to one another's disk space, +# but want to share a MySQL database instance. +# To customize `kachery` file paths, see `dj_local_conf_example.json`. +# +# To set up a new `kachery` instance for your project, contact maintainers +# of this package. + +# ### Connecting + # If you used either a local or global save method, we can check the connection # to the database with ... # @@ -219,16 +433,17 @@ # - # If you see an error saying `Could not find SPYGLASS_BASE_DIR`, try loading your -# config before importing Spyglass, try setting this as an environmental variable -# before importing Spyglass. +# config before importing Spyglass. # # ```python -# os.environ['SPYGLASS_BASE_DIR'] = '/your/base/path' -# -# import spyglass -# from spyglass.settings import SpyglassConfig # import datajoint as dj -# print(SpyglassConfig().config) +# dj.config.load('/your/config/path') +# +# from spyglass.common import Session +# +# Session() +# +# # If successful... # dj.config.save_local() # or global # ``` # @@ -236,5 +451,5 @@ # # Up Next # -# Next, we'll try [inserting data](./01_Insert_Data.ipynb) +# Next, we'll try [introduce some concepts](./01_Concepts.ipynb) # diff --git a/notebooks/py_scripts/01_Concepts.py b/notebooks/py_scripts/01_Concepts.py new file mode 100644 index 000000000..f7f8ca190 --- /dev/null +++ b/notebooks/py_scripts/01_Concepts.py @@ -0,0 +1,170 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.16.0 +# kernelspec: +# display_name: spy +# language: python +# name: python3 +# --- + +# # Concepts +# + +# ## Intro +# + +# _Developer Note:_ if you may make a PR in the future, be sure to copy this +# notebook, and use the `gitignore` prefix `temp` to avoid future conflicts. +# +# This is one notebook in a multi-part series on Spyglass. To set up your Spyglass environment and database, see +# [the Setup notebook](./00_Setup.ipynb) +# +# This notebook will introduce foundational concepts that will help in +# understanding how to work with Spyglass pipelines. +# + +# ## Other materials +# +# DataJoint is an "Object-relational mapping" tool, which means that it gives us +# a Python object for tables that exist on a shared SQL server. Many Spyglass +# imports are DataJoint tables like this. +# +# Any 'introduction to SQL' will give an overview of relational data models as +# a primer on how DataJoint tables within Spyglass will interact with one-another, +# and the ways we can interact with them. A quick primer may help with the +# specifics ahead. +# +# For an overview of DataJoint, including table definitions and inserts, see +# [DataJoint tutorials](https://github.com/datajoint/datajoint-tutorials). + +# ## Common Errors +# +# Skip this for now, but refer back if you hit issues. +# +# +# ### Integrity +# +# ```console +# IntegrityError: Cannot add or update a child row: a foreign key constraint fails (`schema`.`_table`, CONSTRAINT `_table_ibfk_1` FOREIGN KEY (`parent_field`) REFERENCES `other_schema`.`parent_name` (`parent_field`) ON DELETE RESTRICT ON UPDATE CASCADE) +# ``` +# +# `IntegrityError` during `insert` means that some part of the key you're +# inserting doesn't exist in the parent of the table you're inserting into. You +# can explore which that may be by doing the following... +# +# ```python +# my_key = dict(value=key) # whatever you're inserting +# MyTable.insert1(my_key) # error here +# parents = MyTable.parents(as_objects=True) # get the parents as FreeTables +# for parent in parents: # iterate through the parents, with only relevant fields +# parent_key = {k: v for k, v in my_key.items() if k in parent.heading.names} +# print(parent & parent_key) # restricted parent +# ``` +# +# If any of the printed tables are empty, you know you need to insert into that +# table (or another ancestor up the pipeline) first. This code will not work if +# there are aliases in the table (i.e., `proj` in the definition). In that case, +# you'll need to modify your `parent_key` to reflect the renaming. +# +# The error message itself will tell you which table is the limiting parent. After +# `REFERENCES` in the error message, you'll see the parent table and the column +# that is causing the error. +# +# ### Permission +# +# ```console +# ('Insufficient privileges.', "INSERT command denied to user 'username'@'127.0.0.1' for table '_table_name'", 'INSERT INTO `schema_name`.`table_name`(`field1`,`field2`) VALUES (%s,%s)') +# ``` +# +# This is a MySQL error that means that either ... +# +# - You don't have access to the command you're trying to run (e.g., `INSERT`) +# - You don't have access to this command on the schema you're trying to run it on +# +# To see what permissions you have, you can run the following ... +# +# ```python +# dj.conn().query("SHOW GRANTS FOR CURRENT_USER();").fetchall() +# ``` +# +# If you think you should have access to the command, you contact your database +# administrator (e.g., Chris in the Frank Lab). Please share the output of the +# above command with them. +# +# ### Type +# +# ```console +# TypeError: example_function() got an unexpected keyword argument 'this_arg' +# ``` +# +# This means that you're calling a function with an argument that it doesn't +# expect (e.g., `example_function(this_arg=5)`). You can check the function's +# accepted arguments by running `help(example_function)`. +# +# ```console +# TypeError: 'NoneType' object is not iterable +# ``` +# +# This means that some function is trying to do something with an object of an +# unexpected type. For example, if might by running `for item in variable: ...` +# when `variable` is `None`. You can check the type of the variable by going into +# debug mode and running `type(variable)`. +# +# ### KeyError +# +# ```console +# KeyError: 'field_name' +# ``` +# +# This means that you're trying to access a key in a dictionary that doesn't +# exist. You can check the keys of the dictionary by running `variable.keys()`. If +# this is in your custom code, you can get a key and supply a default value if it +# doesn't exist by running `variable.get('field_name', default_value)`. +# +# ### DataJoint +# +# ```console +# DataJointError("Attempt to delete part table {part} before deleting from its master {master} first.") +# ``` +# +# This means that DataJoint's delete process found a part table with a foreign key +# reference to the data you're trying to delete. You need to find the master table +# listed and delete from that table first. +# + +# ## Debug Mode +# +# To fix an error, you may want to enter 'debug mode'. VSCode has a dedicated +# featureful [extension](https://code.visualstudio.com/docs/python/debugging) +# for making use of the UI, but you can choose to use Python's built-in tool. +# +# To enter into debug mode, you can add the following line to your code ... +# +# ```python +# __import__("pdb").set_trace() +# ``` +# +# This will set a breakpoint in your code at that line. When you run your code, it +# will pause at that line and you can explore the variables in the current frame. +# Commands in this mode include ... +# +# - `u` and `d` to move up and down the stack +# - `l` to list the code around the current line +# - `q` to quit the debugger +# - `c` to continue running the code +# - `h` for help, which will list all the commands +# +# `ipython` and jupyter notebooks can launch a debugger automatically at the last +# error by running `%debug`. +# +# + +# ## Up Next +# + +# Next, we'll try [inserting data](./01_Insert_Data.ipynb) diff --git a/notebooks/py_scripts/01_Insert_Data.py b/notebooks/py_scripts/02_Insert_Data.py similarity index 81% rename from notebooks/py_scripts/01_Insert_Data.py rename to notebooks/py_scripts/02_Insert_Data.py index f569f971f..fd1e43505 100644 --- a/notebooks/py_scripts/01_Insert_Data.py +++ b/notebooks/py_scripts/02_Insert_Data.py @@ -5,7 +5,7 @@ # extension: .py # format_name: light # format_version: '1.5' -# jupytext_version: 1.15.2 +# jupytext_version: 1.16.0 # kernelspec: # display_name: spy # language: python @@ -186,6 +186,31 @@ sgc.LabTeam.LabTeamMember() +# In general, we can insert into any table in this say, by supplying +# a dictionary (or list of dictionaries) with all the fields mentioned in +# `Table.heading.names` so long as the data types match what is described in +# `Table.heading` +# +# ```python +# Table.insert1({'a': 1, 'b': 'other'}) # only one entry +# Table.insert([{'a':1, 'b': 'other'}, {'a':1, 'b': 'next'}]) # multiple +# ``` +# +# For example ... + +sgc.ProbeType.insert1( + { + "probe_type": "128c-4s6mm6cm-15um-26um-sl", + "probe_description": "A Livermore flexible probe with 128 channels ...", + "manufacturer": "Lawrence Livermore National Lab", + "num_shanks": 4, + }, + skip_duplicates=True, +) + +# The `skip_duplicates` flag tells DataJoint not to raise an error if the data +# is already in the table. This should only be used in special cases. + # ## Inserting from NWB # @@ -470,6 +495,92 @@ # !ls $SPYGLASS_BASE_DIR/raw +# ## YAML Inserts +# +# The following step is an optional feature, and not required for the remaining +# notebooks. +# +# Not every NWB file has all the information required by Spyglass. For example, +# many NWB files do not contain any information about the `DataAcquisitionDevice` +# or `Probe` because NWB does not yet have an official standard for specifying +# them. Or, information in the NWB file may need correcting. +# +# Manual inserts can either be done on tables directly (e.g., +# `Table.insert1(my_dict)`), or done in batch with `yaml` files. This is done in +# two steps: +# +# 1. Generate data to be entered. +# 2. Associate data with one or more NWB files. +# + +# ### Batch Insert +# +# First, Spyglass will check for an `entries.yaml` file at the base directory +# (see [Setup](./00_Setup.ipynb)) and run all corresponding inserts. +# This is a great place to define entries that the database should auto-insert +# prior to ingesting any NWB files. An example can be found in +# `examples/config_yaml/entries.yaml`. It has the following structure: +# +# ```yaml +# TableName: +# - TableEntry1Field1: Value +# +# TableEntry1Field2: +# - TableEntry2Field1: Value +# +# TableEntry2Field2: Value +# ``` +# +# For example, +# +# ```yaml +# ProbeType: +# - probe_type: 128c-4s6mm6cm-15um-26um-sl +# probe_description: A Livermore flexible probe with 128 channels, 4 shanks, +# 6 mm shank length, 6 cm ribbon length. 15 um contact diameter, 26 um +# center-to-center distance (pitch), single-line configuration. +# manufacturer: Lawrence Livermore National Lab +# num_shanks: 4 +# ``` +# +# Using a YAML file over data stored in Python scripts helps maintain records +# of data entries in a human-readable file. For ways to share a state of the +# database, see our [export tutorial](./05_Export.ipynb). +# + +# ### Pairing with NWBs +# +# Next, we'll need to create a _configuration file_ to associate the above entries +# with session(s). This must be done in the same directory as the NWB file that it +# configures and have the following naming convention: +# `_spyglass_config.yaml`. This file is then read by Spyglass +# when calling `insert_session` on the associated NWB file. +# +# An example of this can be found at +# `examples/config_yaml/​​sub-AppleBottom_ses-AppleBottom-DY20-g3_behavior+ecephys_spyglass_config.yaml`. +# +# This file is associated with the NWB file +# `sub-AppleBottom_ses-AppleBottom-DY20-g3_behavior+ecephys.nwb`. +# +# This is the general format for the config entry: +# +# ```yaml +# TableName: +# - primary_key1: value1 +# ``` +# +# For example: +# +# ```yaml +# DataAcquisitionDevice: +# - data_acquisition_device_name: Neuropixels Recording Device +# ``` +# +# In this example, the NWB file that corresponds to this config YAML will become +# associated with the DataAcquisitionDevice with primary key +# data_acquisition_device_name: Neuropixels Recording Device. This entry must +# already exist. + # ## Up Next # diff --git a/notebooks/py_scripts/02_Data_Sync.py b/notebooks/py_scripts/03_Data_Sync.py similarity index 100% rename from notebooks/py_scripts/02_Data_Sync.py rename to notebooks/py_scripts/03_Data_Sync.py diff --git a/notebooks/py_scripts/03_Merge_Tables.py b/notebooks/py_scripts/04_Merge_Tables.py similarity index 100% rename from notebooks/py_scripts/03_Merge_Tables.py rename to notebooks/py_scripts/04_Merge_Tables.py diff --git a/notebooks/py_scripts/04_PopulateConfigFile.py b/notebooks/py_scripts/04_PopulateConfigFile.py deleted file mode 100644 index 74ec39571..000000000 --- a/notebooks/py_scripts/04_PopulateConfigFile.py +++ /dev/null @@ -1,194 +0,0 @@ -# --- -# jupyter: -# jupytext: -# text_representation: -# extension: .py -# format_name: light -# format_version: '1.5' -# jupytext_version: 1.16.0 -# kernelspec: -# display_name: Python 3 (ipykernel) -# language: python -# name: python3 -# --- - -# # Customizing Data Insertion into Spyglass -# -# If you would like to insert data into Spyglass that does not -# follow the naming or organizational format expected by Spyglass, -# or you would like to override what values are ingested into Spyglass from -# your NWB files, including missing values, follow this guide. -# -# ## General Approach -# -# When an NWB file is ingested into Spyglass, metadata about the session -# is first read from the NWB file and inserted into -# tables in the `common` module (e.g. `Institution`, `Lab`, `Electrode`, etc). -# However, not every NWB file has all the information required by Spyglass or -# the information in the NWB file is not in a format that Spyglass expects. For -# example, many NWB files do not contain information about the -# `DataAcquisitionDevice` or `Probe` because the NWB data standard does not yet -# have an official -# standard for specifying them. For these cases, we provide a way to customize -# how data is ingested into Spyglass. -# -# Let's say that you want to ingest an NWB file into Spyglass where the lab name -# in the file is written as "Loren Frank Lab" or it is not specified, but you -# know the data comes from the Loren Frank Lab. Let's say that in Spyglass, -# the lab name that is associated with sessions from the Loren Frank Lab is -# "Frank Lab" and you would like to use the same name in order to facilitate -# data search in Spyglass. To change the lab name when you insert your new data -# to Spyglass, you could either 1) edit the NWB file directly and then -# insert it into Spyglass, or 2) define an override value "Frank Lab" to be -# used instead of the value specified in the NWB file (or lack thereof). -# -# Note that if this is your own data and you want to make changes to -# information about how the data is interpreted, e.g., the units of measurement -# are incorrect, we recommend that you edit the NWB file directly because the -# file or derivatives of it might eventually be shared outside of Spyglass and -# they will not reflect any modifications that you have made to -# the data only in Spyglass. - -# ## Define a Configuration YAML File -# -# To override values in the NWB file during insertion into Spyglass, -# you will need to create a configuration -# YAML file that lives in the same directory as your NWB file, named: -# `_spyglass_config.yaml` -# -# An example configuration YAML file can be found at -# `examples/config_yaml/​​sub-AppleBottom_ses-AppleBottom-DY20-g3_behavior+ecephys_spyglass_config.yaml`. -# This file is associated with the NWB file -# `sub-AppleBottom_ses-AppleBottom-DY20-g3_behavior+ecephys.nwb`. -# -# This is the general format for entries in this configuration file: -# -# ```yaml -# TableName: -# - primary_key1: value1 -# ``` -# -# For example: -# -# ```yaml -# Lab: -# - lab_name: Frank Lab -# ``` -# -# In this example, the NWB file that corresponds to this config YAML will become -# associated with the entry in the `Lab` table with the value `Frank Lab` for -# the primary key `lab_name`. This entry must already exist. More specifically, -# when the NWB file is ingested into Spyglass, -# a new `Session` entry will be created for the NWB file that has a foreign key to -# the `Lab` entry with `lab_name` = "Frank Lab", ignoring whatever lab value is -# in the NWB file, even if one does not exist. -# -# TODO implement this for `Lab`. -# - -# ## Create Entries to Reference in the Configuration YAML -# -# As mentioned earlier, the table entry that you want to associate with your NWB -# file must already exist in the database. This entry would typically be a value -# that is independent of any particular NWB file, such as -# `DataAcquisitionDevice`, `Lab`, `Probe`, and `BrainRegion`. -# -# If the entry does not already exist, you can either: -# 1) create the entry programmatically using DataJoint `insert` commands, or -# 2) define the entry in a YAML file called `entries.yaml` that is automatically -# processed when Spyglass is imported. You can think of `entries.yaml` as a -# place to define information that the database should come pre-equipped prior -# to ingesting your NWB files. The `entries.yaml` file should be placed in the -# `spyglass` base directory (next to `README.md`). An example can be found in -# `examples/config_yaml/entries.yaml`. This file should have the following -# structure: -# -# ```yaml -# TableName: -# - TableEntry1Field1: Value -# TableEntry1Field2: Value -# - TableEntry2Field1: Value -# TableEntry2Field2: Value -# ``` -# -# For example, -# -# ```yaml -# DataAcquisitionDeviceSystem: -# data_acquisition_device_system: SpikeGLX -# DataAcquisitionDevice: -# - data_acquisition_device_name: Neuropixels_SpikeGLX -# data_acquisition_device_system: SpikeGLX -# data_acquisition_device_amplifier: Intan -# ``` -# -# Only `dj.Manual`, `dj.Lookup`, and `dj.Part` tables can be populated -# using this approach. -# -# Once the entry that you want to associate with your NWB file exists in the -# database, you can write the configuration YAML file and then ingest your -# NWB file. As an another example, let's say that you want to associate your NWB -# file with the `DataAcquisitionDevice` entry with `data_acquisition_device_name` -# = "Neuropixels_SpikeGLX" that was defined above. You would write the following -# configuration YAML file: -# -# ```yaml -# DataAcquisitionDevice: -# - data_acquisition_device_name: Neuropixels_SpikeGLX -# ``` -# -# The example in -# `examples/config_yaml/​​sub-AppleBottom_ses-AppleBottom-DY20-g3_behavior+ecephys_spyglass_config.yaml` -# includes additional examples. - -# ## Example Ingestion with Real Data -# -# For this example, you will need to download the 5 GB NWB file -# `sub-JDS-NFN-AM2_behavior+ecephys.nwb` -# from dandiset 000447 here: -# https://dandiarchive.org/dandiset/000447/0.230316.2133/files?location=sub-JDS-NFN-AM2&page=1 -# -# Click the download arrow button to download the file to your computer. Add it to the folder -# containing your raw NWB data to be ingested into Spyglass. -# -# This file does not specify a data acquisition device. Let's say that the -# data was collected from a SpikeGadgets system with an Intan amplifier. This -# matches an existing entry in the `DataAcquisitionDevice` table with name -# "data_acq_device0". We will create a configuration YAML file to associate -# this entry with the NWB file. -# -# If you are connected to the Frank lab database, please rename any downloaded -# files (e.g., `example20200101_yourname.nwb`) to avoid naming collisions, as the -# file name acts as the primary key across key tables. - -nwb_file_name = "sub-JDS-NFN-AM2_behavior+ecephys_rly.nwb" - -# this configuration yaml file should be placed next to the downloaded NWB file -yaml_config_path = "sub-JDS-NFN-AM2_behavior+ecephys_rly_spyglass_config.yaml" -with open(yaml_config_path, "w") as config_file: - lines = [ - "DataAcquisitionDevice", - "- data_acquisition_device_name: data_acq_device0", - ] - config_file.writelines(line + "\n" for line in lines) - -# Then call `insert_sessions` as usual. - -# + -import spyglass.data_import as sgi - -sgi.insert_sessions(nwb_file_name) -# - - -# Confirm the session was inserted with the correct `DataAcquisitionDevice` - -# + -import spyglass.common as sgc -from spyglass.utils.nwb_helper_fn import get_nwb_copy_filename - -nwb_copy_file_name = get_nwb_copy_filename(nwb_file_name) - -sgc.Session.DataAcquisitionDevice & {"nwb_file_name": nwb_copy_file_name} -# - - -# diff --git a/notebooks/py_scripts/10_Spike_SortingV0.py b/notebooks/py_scripts/10_Spike_SortingV0.py index 2675799db..670f46559 100644 --- a/notebooks/py_scripts/10_Spike_SortingV0.py +++ b/notebooks/py_scripts/10_Spike_SortingV0.py @@ -5,14 +5,18 @@ # extension: .py # format_name: light # format_version: '1.5' -# jupytext_version: 1.15.2 +# jupytext_version: 1.16.0 # kernelspec: # display_name: Python 3.10.5 64-bit # language: python # name: python3 # --- -# # Spike Sorting +# # Spike Sorting V0 +# +# _Note_: This notebook explains the first version of the spike sorting pipeline +# and is preserved for using existing data. New users should use +# [V1](./10_Spike_SortingV1.ipynb). # # ## Overview diff --git a/notebooks/py_scripts/21_DLC.py b/notebooks/py_scripts/21_DLC.py index 8a55441e8..5366c38ca 100644 --- a/notebooks/py_scripts/21_DLC.py +++ b/notebooks/py_scripts/21_DLC.py @@ -756,6 +756,35 @@ sgp.DLCPosVideo().populate(dlc_key) +#
On editing parameters +# +# The presence of existing parameters in many tables makes it easy to tweak them +# for your needs. You can fetch, edit, and re-insert new params - but the process +# will look a little different if the table has a `=BLOB=` field. +# +# (These example assumes only one primary key. If multiple, `{'primary_key': 'x'}` +# and `['primary_key']` will need to be adjusted accordingly.) +# +# No blob means that all parameters are fields in the table. +# +# ```python +# existing_params = (MyParamsTable & {'primary_key':'x'}).fetch1() +# new_params = {**existing_params, 'primary_key': 'y', 'my_variable': 'a', 'other_variable':'b'} +# MyParamsTable.insert1(new_params) +# ``` +# +# A blob means that the params are stored as an embedded dictionary. We'll assume +# this column is called `params` +# +# ```python +# existing_params = (MyParamsTable & {'primary_key':'x'}).fetch1() +# new_params = {**existing_params, 'primary_key': 'y'} +# print(existing_params['params']) # check existing values +# new_params['params'] = {**existing_params['params'], 'my_variable': 'a', 'other_variable':'b'} +# ``` +# +#
+ # #### [PositionOutput](#TableOfContents) # diff --git a/pyproject.toml b/pyproject.toml index 061947e3d..2b877597d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -127,7 +127,7 @@ minversion = "7.0" addopts = [ # "-sv", # no capture, verbose output "--sw", # stepwise: resume with next test after failure - "--pdb", # drop into debugger on failure + # "--pdb", # drop into debugger on failure "-p no:warnings", # "--no-teardown", # don't teardown the database after tests # "--quiet-spy", # don't show logging from spyglass diff --git a/src/spyglass/decoding/v1/clusterless.py b/src/spyglass/decoding/v1/clusterless.py index f9e149df1..5b128685e 100644 --- a/src/spyglass/decoding/v1/clusterless.py +++ b/src/spyglass/decoding/v1/clusterless.py @@ -446,8 +446,6 @@ def fetch_spike_data(key, filter_by_interval=True): filter_by_interval : bool, optional Whether to filter for spike times in the model interval. Default True - time_slice : Slice, optional - User provided slice of time to restrict spikes to. Default None Returns ------- diff --git a/src/spyglass/position/v1/dlc_utils_makevid.py b/src/spyglass/position/v1/dlc_utils_makevid.py index cc4cadc0d..12e9baeb0 100644 --- a/src/spyglass/position/v1/dlc_utils_makevid.py +++ b/src/spyglass/position/v1/dlc_utils_makevid.py @@ -119,7 +119,7 @@ def make_video(self): def _init_video(self): logger.info(f"Making video: {self.output_video_filename}") - self.video = cv2.VideoCapture(self.video_filename) + self.video = cv2.VideoCapture(str(self.video_filename)) self.frame_size = ( (int(self.video.get(3)), int(self.video.get(4))) if not self.crop diff --git a/src/spyglass/utils/dj_helper_fn.py b/src/spyglass/utils/dj_helper_fn.py index d9465fffa..9dfa6f02d 100644 --- a/src/spyglass/utils/dj_helper_fn.py +++ b/src/spyglass/utils/dj_helper_fn.py @@ -487,7 +487,7 @@ def populate_pass_function(value): Parameters ---------- value : (table, key, kwargs) - Class of table to populate, key to populate, and kwargs for populate + Class of table to populate, key to populate, and kwargs for populate """ table, key, kwargs = value return table.populate(key, **kwargs) diff --git a/src/spyglass/utils/dj_mixin.py b/src/spyglass/utils/dj_mixin.py index 6eee80e40..8481c4d30 100644 --- a/src/spyglass/utils/dj_mixin.py +++ b/src/spyglass/utils/dj_mixin.py @@ -4,6 +4,7 @@ from functools import cached_property from inspect import stack as inspect_stack from os import environ +from re import match as re_match from time import time from typing import Dict, List, Union @@ -736,7 +737,18 @@ def _spyglass_version(self): """Get Spyglass version.""" from spyglass import __version__ as sg_version - return ".".join(sg_version.split(".")[:3]) # Major.Minor.Patch + ret = ".".join(sg_version.split(".")[:3]) # Ditch commit info + + if self._test_mode: + return ret[:16] if len(ret) > 16 else ret + + if not bool(re_match(r"^\d+\.\d+\.\d+", ret)): # Major.Minor.Patch + raise ValueError( + f"Spyglass version issues. Expected #.#.#, Got {ret}." + + "Please try running `hatch build` from your spyglass dir." + ) + + return ret @cached_property def _export_table(self): diff --git a/tests/position/test_dlc_cent.py b/tests/position/test_dlc_cent.py index 7980a2b30..fb3687cef 100644 --- a/tests/position/test_dlc_cent.py +++ b/tests/position/test_dlc_cent.py @@ -59,8 +59,9 @@ def test_centroid_calcs(key, sgp): df, max_LED_separation=100, points={p: p for p in points} ).centroid - assert np.all(ret[:-1] == 1), f"Centroid calculation failed for {key}" - assert np.all(np.isnan(ret[-1])), f"Centroid calculation failed for {key}" + fail_msg = f"Centroid calculation failed for {key}" + assert np.all(ret[:-1] == 1), fail_msg + assert np.all(np.isnan(ret[-1])), fail_msg def test_centroid_error(sgp): diff --git a/tests/position/test_dlc_proj.py b/tests/position/test_dlc_proj.py index 7eaba196d..0ca4bd1bb 100644 --- a/tests/position/test_dlc_proj.py +++ b/tests/position/test_dlc_proj.py @@ -57,7 +57,10 @@ def test_failed_name_insert( ), "Project re-insert did not return expected key" -def test_failed_group_insert(dlc_project_tbl, new_project_key): +@pytest.mark.usefixtures("skipif_no_dlc") +def test_failed_group_insert(no_dlc, dlc_project_tbl, new_project_key): + if no_dlc: # Decorator wasn't working here, so duplicate skipif + pytest.skip(reason="Skipping DLC-dependent tests.") with pytest.raises(ValueError): dlc_project_tbl.insert_new_project(**new_project_key)