Skip to content

Commit

Permalink
feat: adds data enums
Browse files Browse the repository at this point in the history
  • Loading branch information
jtyoung84 committed May 8, 2024
1 parent 91a536f commit dbb6889
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 0 deletions.
67 changes: 67 additions & 0 deletions src/aind_data_schema_models/data_name_patterns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""Module for defining our data naming conventions"""

from datetime import datetime
from enum import Enum


class RegexParts(str, Enum):
"""Regular expression components to be re-used elsewhere"""

DATE = r"\d{4}-\d{2}-\d{2}"
TIME = r"\d{2}-\d{2}-\d{2}"


class DataRegex(str, Enum):
"""Regular expression patterns for different kinds of data and their properties"""

DATA = f"^(?P<label>.+?)_(?P<c_date>{RegexParts.DATE.value})_(?P<c_time>{RegexParts.TIME.value})$"
RAW = (
f"^(?P<platform_abbreviation>.+?)_(?P<subject_id>.+?)_(?P<c_date>{RegexParts.DATE.value})_(?P<c_time>"
f"{RegexParts.TIME.value})$"
)
DERIVED = (
f"^(?P<input>.+?_{RegexParts.DATE.value}_{RegexParts.TIME.value})_(?P<process_name>.+?)_(?P<c_date>"
f"{RegexParts.DATE.value})_(?P<c_time>{RegexParts.TIME.value})"
)
ANALYZED = (
f"^(?P<project_abbreviation>.+?)_(?P<analysis_name>.+?)_(?P<c_date>"
f"{RegexParts.DATE.value})_(?P<c_time>{RegexParts.TIME.value})$"
)
NO_UNDERSCORES = "^[^_]+$"
NO_SPECIAL_CHARS = '^[^<>:;"/|? \\_]+$'
NO_SPECIAL_CHARS_EXCEPT_SPACE = '^[^<>:;"/|?\\_]+$'


class DataLevel(str, Enum):
"""Data level name"""

DERIVED = "derived"
RAW = "raw"
SIMULATED = "simulated"


class Group(str, Enum):
"""Data collection group name"""

BEHAVIOR = "behavior"
EPHYS = "ephys"
MSMA = "MSMA"
OPHYS = "ophys"


def datetime_to_name_string(dt: datetime) -> str:
"""Take a date and time object, format it as a string"""
return dt.strftime("%Y-%m-%d_%H-%M-%S")


def datetime_from_name_string(d: str, t: str) -> datetime:
"""Take date and time strings, generate date and time objects"""
d = datetime.strptime(d, "%Y-%m-%d").date()
t = datetime.strptime(t, "%H-%M-%S").time()
return datetime.combine(d, t)


def build_data_name(label: str, creation_datetime: datetime) -> str:
"""Construct a valid data description name"""
dt_str = datetime_to_name_string(creation_datetime)
return f"{label}_{dt_str}"
39 changes: 39 additions & 0 deletions tests/test_data_name_patterns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""Tests classes in data_name_patterns module"""

import unittest

from aind_data_schema_models.data_name_patterns import (
datetime_to_name_string,
datetime_from_name_string,
build_data_name,
RegexParts,
DataRegex,
DataLevel,
Group,
)


class TestRegexParts(unittest.TestCase):
"""Tests methods in RegexParts class"""

def test_patterns_success(self):
"""Tests that the regex patterns match successfully."""

input_date = "2020-10-19"
input_time = "08-30-59"

self.assertRegex(input_date, RegexParts.DATE)
self.assertRegex(input_time, RegexParts.TIME)

def test_patterns_fail(self):
"""Tests that the regex patterns match unsuccessfully."""

deformed_date = "10/19/2020"
deformed_time = "8:30:59"

self.assertNotRegex(deformed_date, RegexParts.DATE)
self.assertNotRegex(deformed_time, RegexParts.TIME)


if __name__ == "__main__":
unittest.main()

0 comments on commit dbb6889

Please sign in to comment.