Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

No-credential data download #1180

Merged
merged 12 commits into from
Nov 12, 2024
28 changes: 14 additions & 14 deletions .github/workflows/test-conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ on:
- '!documentation'
schedule: # once a day at midnight UTC
- cron: '0 0 * * *'
pull_request: # requires approval for first-time contributors
types: [synchronize, opened, reopened, labeled]
workflow_dispatch: # Manually trigger with 'Run workflow' button

concurrency: # Replace Cancel Workflow Action
Expand All @@ -22,8 +24,6 @@ jobs:
env:
OS: ubuntu-latest
PYTHON: '3.9'
UCSF_BOX_TOKEN: ${{ secrets.UCSF_BOX_TOKEN }} # for download and testing
UCSF_BOX_USER: ${{ secrets.UCSF_BOX_USER }}
services:
mysql:
image: datajoint/mysql:8.0
Expand Down Expand Up @@ -57,23 +57,23 @@ jobs:
pip install --quiet .[test]
- name: Download data
env:
BASEURL: ftps://ftp.box.com/trodes_to_nwb_test_data/
NWBFILE: minirec20230622.nwb # Relative to Base URL
VID_ONE: 20230622_sample_01_a1/20230622_sample_01_a1.1.h264
VID_TWO: 20230622_sample_02_a1/20230622_sample_02_a1.1.h264
BASEURL: https://ucsf.box.com/shared/static/
NWB_URL: k3sgql6z475oia848q1rgms4zdh4rkjn.nwb
VID1URL: ykep8ek4ogad20wz4p0vuyuqfo60cv3w.h264
VID2URL: d2jjk0y565ru75xqojio3hymmehzr5he.h264
NWBFILE: minirec20230622.nwb
VID_ONE: 20230622_minirec_01_s1.1.h264
VID_TWO: 20230622_minirec_02_s2.1.h264
RAW_DIR: /home/runner/work/spyglass/spyglass/tests/_data/raw/
VID_DIR: /home/runner/work/spyglass/spyglass/tests/_data/video/
run: |
mkdir -p $RAW_DIR $VID_DIR
wget_opts() { # Declare func with download options
wget \
--recursive --no-verbose --no-host-directories --no-directories \
--user "$UCSF_BOX_USER" --password "$UCSF_BOX_TOKEN" \
-P "$1" "$BASEURL""$2"
curl_opts() { # Declare func with download options
curl -L --output "$1""$2" "$BASEURL""$3"
}
wget_opts $RAW_DIR $NWBFILE
wget_opts $VID_DIR $VID_ONE
wget_opts $VID_DIR $VID_TWO
curl_opts $RAW_DIR $NWBFILE $NWB_URL
curl_opts $VID_DIR $VID_ONE $VID1URL
curl_opts $VID_DIR $VID_TWO $VID2URL
- name: Run tests
run: |
pytest --no-docker --no-dlc
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ dj.FreeTable(dj.conn(), "common_session.session_group").drop()
- Remove debug statement #1164
- Add testing for python versions 3.9, 3.10, 3.11, 3.12 #1169
- Initialize tables in pytests #1181
- Download test data without credentials, trigger on approved PRs #1180
- Allow python \< 3.13 #1169
- Remove numpy version restriction #1169
- Merge table delete removes orphaned master entries #1164
Expand Down
6 changes: 0 additions & 6 deletions tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,6 @@

## Environment

To allow pytest helpers to automatically dowlnoad requisite data, you'll need to
set credentials for Box. Consider adding these to a private `.env` file.

- `UCSF_BOX_USER`: UCSF email address
- `UCSF_BOX_TOKEN`: Token generated from UCSF Box account

To facilitate headless testing of various Qt-based tools as well as Tensorflow,
`pyproject.toml` includes some environment variables associated with the
display. These are...
Expand Down
1 change: 0 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ def pytest_configure(config):
)

DOWNLOADS = DataDownloader(
nwb_file_name=TEST_FILE,
base_dir=BASE_DIR,
verbose=VERBOSE,
download_dlc=not NO_DLC,
Expand Down
89 changes: 29 additions & 60 deletions tests/data_downloader.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from functools import cached_property
from os import environ as os_environ
from pathlib import Path
from shutil import copy as shutil_copy
from subprocess import DEVNULL, Popen
Expand All @@ -9,135 +8,105 @@

from datajoint import logger as dj_logger

UCSF_BOX_USER = os_environ.get("UCSF_BOX_USER")
UCSF_BOX_TOKEN = os_environ.get("UCSF_BOX_TOKEN")
BASE_URL = "ftps://ftp.box.com/trodes_to_nwb_test_data/"
BASE_URL = "https://ucsf.box.com/shared/static/"

NON_DLC = 3 # First N items below are not for DeepLabCut
FILE_PATHS = [
{
"relative_dir": "raw",
"target_name": "minirec20230622.nwb",
"url": BASE_URL + "minirec20230622.nwb",
"url": BASE_URL + "k3sgql6z475oia848q1rgms4zdh4rkjn.nwb",
},
{
"relative_dir": "video",
"target_name": "20230622_minirec_01_s1.1.h264",
"url": BASE_URL + "20230622_sample_01_a1/20230622_sample_01_a1.1.h264",
"url": BASE_URL + "ykep8ek4ogad20wz4p0vuyuqfo60cv3w.h264",
},
{
"relative_dir": "video",
"target_name": "20230622_minirec_02_s2.1.h264",
"url": BASE_URL + "20230622_sample_02_a1/20230622_sample_02_a1.1.h264",
"url": BASE_URL + "d2jjk0y565ru75xqojio3hymmehzr5he.h264",
},
{
"relative_dir": "deeplabcut",
"target_name": "CollectedData_sc_eb.csv",
"url": BASE_URL + "minirec_dlc_items/CollectedData_sc_eb.csv",
"url": BASE_URL + "3nzqdfty51vrga7470rn2vayrtoor3ot.csv",
},
{
"relative_dir": "deeplabcut",
"target_name": "CollectedData_sc_eb.h5",
"url": BASE_URL + "minirec_dlc_items/CollectedData_sc_eb.h5",
"url": BASE_URL + "sx30rqljppeisi4jdyu53y51na0q9rff.h5",
},
{
"relative_dir": "deeplabcut",
"target_name": "img000.png",
"url": BASE_URL + "minirec_dlc_items/img000.png",
"url": BASE_URL + "wrvgncfbpjuzfhopkfaizzs069tb1ruu.png",
},
{
"relative_dir": "deeplabcut",
"target_name": "img001.png",
"url": BASE_URL + "minirec_dlc_items/img001.png",
"url": BASE_URL + "czbkxeinemat7jj7j0877pcosfqo9psh.png",
},
]


class DataDownloader:
def __init__(
self,
nwb_file_name,
file_paths=FILE_PATHS,
base_dir=".",
download_dlc=True,
verbose=True,
):
if not all([UCSF_BOX_USER, UCSF_BOX_TOKEN]):
raise ValueError(
"Missing os.environ credentials: UCSF_BOX_USER, UCSF_BOX_TOKEN."
)
if nwb_file_name != file_paths[0]["target_name"]:
raise ValueError(
f"Please adjust data_downloader.py to match: {nwb_file_name}"
)

self.cmd = [
"wget",
"--recursive",
"--no-host-directories",
"--no-directories",
"--user",
UCSF_BOX_USER,
"--password",
UCSF_BOX_TOKEN,
"-P", # Then need relative path, then url
]

self.verbose = verbose
if not verbose:
self.cmd.insert(self.cmd.index("--recursive") + 1, "--no-verbose")
self.cmd_kwargs = dict(stdout=DEVNULL, stderr=DEVNULL)
else:
if verbose:
self.cmd_kwargs = dict(stdout=stdout, stderr=stderr)
else:
self.cmd_kwargs = dict(stdout=DEVNULL, stderr=DEVNULL)

self.base_dir = Path(base_dir).resolve()
self.verbose = verbose
self.base_dir = Path(base_dir).expanduser().resolve()
self.download_dlc = download_dlc
self.file_paths = file_paths if download_dlc else file_paths[:NON_DLC]
self.base_dir.mkdir(exist_ok=True)

# Start downloads
_ = self.file_downloads

def rename_files(self):
"""Redundant, but allows rerun later in startup process of conftest."""
for path in self.file_paths:
target, url = path["target_name"], path["url"]
target_dir = self.base_dir / path["relative_dir"]
orig = target_dir / url.split("/")[-1]
dest = target_dir / target

if orig.exists():
orig.rename(dest)

@cached_property # Only make list of processes once
def file_downloads(self) -> Dict[str, Union[Popen, None]]:
"""{File: POpen/None} for each file. If exists/finished, None."""
ret = dict()
self.rename_files()
for path in self.file_paths:
target, url = path["target_name"], path["url"]
target_dir = self.base_dir / path["relative_dir"]
target_dir.mkdir(exist_ok=True, parents=True)

target = path["target_name"]
dest = target_dir / target
cmd = (
["echo", f"Already have {target}"]
if dest.exists()
else self.cmd + [target_dir, url]
)

if dest.exists():
cmd = ["echo", f"Already have {target}"]
else:
cmd = ["curl", "-L", "--output", str(dest), f"{path['url']}"]

print(f"cmd: {cmd}")

ret[target] = Popen(cmd, **self.cmd_kwargs)

return ret

def wait_for(self, target: str):
"""Wait for target to finish downloading."""
status = self.file_downloads.get(target).poll()

limit = 10
while status is None and limit > 0:
time_sleep(5) # Some
time_sleep(5)
limit -= 1
status = self.file_downloads.get(target).poll()
if status != 0:

if status != 0: # Error downloading
raise ValueError(f"Error downloading: {target}")
if limit < 1:
if limit < 1: # Reached attempt limit
raise TimeoutError(f"Timeout downloading: {target}")

def move_dlc_items(self, dest_dir: Path):
Expand Down
Loading