-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Initial Dataschema 6.0 * Fix tests. * Add/fix tests. * Fix readme.
- Loading branch information
1 parent
0314c04
commit d8a3483
Showing
15 changed files
with
539 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
from ..dataschema_5_1 import ( | ||
from ..dataschema_6_0 import ( | ||
DataSchema, | ||
StepDefaults, | ||
FileType, | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
from pydantic import BaseModel, Field | ||
from typing import Sequence, Optional, Mapping, Any, Literal | ||
from .step import Step | ||
from .stepdefaults import StepDefaults | ||
from .filetype import ( # noqa: F401 | ||
ExtractorFactory as ExtractorFactory, | ||
FileType as FileType, | ||
FileTypes as FileTypes, | ||
) | ||
|
||
|
||
class DataSchema(BaseModel, extra="forbid"): | ||
""" | ||
A :class:`pydantic.BaseModel` implementing ``DataSchema-6.0`` model | ||
introduced in ``yadg-6.0``. | ||
""" | ||
|
||
version: Literal["6.0"] | ||
|
||
metadata: Optional[Mapping[str, Any]] | ||
"""Input metadata for :mod:`yadg`.""" | ||
|
||
step_defaults: StepDefaults = Field(..., default_factory=StepDefaults) | ||
"""Default values for configuration of each :class:`Step`.""" | ||
|
||
steps: Sequence[Step] | ||
"""Input commands for :mod:`yadg`'s extractors, organised as a :class:`Sequence` | ||
of :class:`Steps`.""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
from pydantic import BaseModel | ||
from typing import Literal, Optional, Union | ||
|
||
|
||
class ExternalDateFile(BaseModel, extra="forbid"): | ||
"""Read external date information from file.""" | ||
|
||
class Content(BaseModel, extra="forbid"): | ||
path: str | ||
"""Path to the external date information file.""" | ||
|
||
type: str | ||
"""Type of the external date information file.""" | ||
|
||
match: Optional[str] = None | ||
"""String to be matched within the file.""" | ||
|
||
file: Content | ||
|
||
|
||
class ExternalDateFilename(BaseModel, extra="forbid"): | ||
"""Read external date information from the file name.""" | ||
|
||
class Content(BaseModel, extra="forbid"): | ||
format: str | ||
"""``strptime``-like format string for processing the date.""" | ||
|
||
len: int | ||
"""Number of characters from the start of the filename to parse.""" | ||
|
||
filename: Content | ||
|
||
|
||
class ExternalDateISOString(BaseModel, extra="forbid"): | ||
"""Read a constant external date using an ISO-formatted string.""" | ||
|
||
isostring: str | ||
|
||
|
||
class ExternalDateUTSOffset(BaseModel, extra="forbid"): | ||
"""Read a constant external date using a Unix timestamp offset.""" | ||
|
||
utsoffset: float | ||
|
||
|
||
class ExternalDate(BaseModel, extra="forbid"): | ||
"""Supply timestamping information that are external to the processed file.""" | ||
|
||
using: Union[ | ||
ExternalDateFile, | ||
ExternalDateFilename, | ||
ExternalDateISOString, | ||
ExternalDateUTSOffset, | ||
] | ||
"""Specification of the external date format.""" | ||
|
||
mode: Literal["add", "replace"] = "add" | ||
"""Whether the external timestamps should be added to or should replace the | ||
parsed data.""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,220 @@ | ||
import sys | ||
import inspect | ||
from pydantic import BaseModel, Field, field_validator | ||
from abc import ABC | ||
from typing import Optional, Literal, Mapping, Any, TypeVar | ||
import tzlocal | ||
from babel import Locale | ||
import logging | ||
|
||
from .stepdefaults import StepDefaults | ||
from .parameters import Timestamps, Timestamp | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class FileType(BaseModel, ABC, extra="forbid"): | ||
"""Template abstract base class for parser classes.""" | ||
|
||
filetype: Optional[str] = None | ||
timezone: Optional[str] = None | ||
locale: Optional[str] = None | ||
encoding: Optional[str] = None | ||
parameters: Optional[Any] = None | ||
|
||
@field_validator("timezone") | ||
@classmethod | ||
def timezone_resolve_localtime(cls, v): | ||
if v == "localtime": | ||
v = tzlocal.get_localzone_name() | ||
return v | ||
|
||
@field_validator("locale") | ||
@classmethod | ||
def locale_validate_default(cls, v): | ||
if v is not None: | ||
v = str(Locale.parse(v)) | ||
return v | ||
|
||
|
||
class Example(FileType): | ||
class Parameters(BaseModel, extra="allow"): | ||
pass | ||
|
||
parameters: Parameters = Field(default_factory=Parameters) | ||
filetype: Literal["example"] | ||
|
||
|
||
class Agilent_ch(FileType): | ||
filetype: Literal["agilent.ch"] | ||
|
||
|
||
class Agilent_dx(FileType): | ||
filetype: Literal["agilent.dx"] | ||
|
||
|
||
class Agilent_csv(FileType): | ||
filetype: Literal["agilent.csv"] | ||
|
||
|
||
class Basic_csv(FileType): | ||
class Parameters(BaseModel, extra="forbid"): | ||
sep: str = "," | ||
"""Separator of table columns.""" | ||
|
||
strip: Optional[str] = None | ||
"""A :class:`str` of characters to strip from headers & data.""" | ||
|
||
units: Optional[Mapping[str, str]] = None | ||
"""A :class:`dict` containing ``column: unit`` keypairs.""" | ||
|
||
timestamp: Optional[Timestamps] = None | ||
"""Timestamp specification allowing calculation of Unix timestamp for | ||
each table row.""" | ||
|
||
parameters: Parameters = Field(default_factory=Parameters) | ||
filetype: Literal["basic.csv"] | ||
|
||
|
||
class Drycal_csv(FileType): | ||
filetype: Literal["drycal.csv"] | ||
|
||
|
||
class Drycal_rtf(FileType): | ||
filetype: Literal["drycal.rtf"] | ||
|
||
|
||
class Drycal_txt(FileType): | ||
filetype: Literal["drycal.txt"] | ||
|
||
|
||
class EClab_mpr(FileType): | ||
filetype: Literal["eclab.mpr"] | ||
|
||
|
||
class EClab_mpt(FileType): | ||
filetype: Literal["eclab.mpt"] | ||
encoding: Optional[str] = "windows-1252" | ||
|
||
@field_validator("encoding") | ||
@classmethod | ||
def set_encoding(cls, encoding): | ||
return encoding or "windows-1252" | ||
|
||
|
||
class EmpaLC_csv(FileType): | ||
filetype: Literal["empalc.csv"] | ||
|
||
|
||
class EmpaLC_xlsx(FileType): | ||
filetype: Literal["empalc.xlsx"] | ||
|
||
|
||
class EZChrom_dat(FileType): | ||
filetype: Literal["ezchrom.dat"] | ||
|
||
|
||
class EZChrom_asc(FileType): | ||
filetype: Literal["ezchrom.asc"] | ||
encoding: Optional[str] = "windows-1252" | ||
|
||
@field_validator("encoding") | ||
@classmethod | ||
def set_encoding(cls, encoding): | ||
return encoding or "windows-1252" | ||
|
||
|
||
class FHI_csv(FileType): | ||
class Parameters(BaseModel, extra="forbid"): | ||
timestamp: Timestamps = Field( | ||
Timestamp(timestamp={"index": 0, "format": "%Y-%m-%d-%H-%M-%S"}) | ||
) | ||
|
||
parameters: Parameters = Field(default_factory=Parameters) | ||
filetype: Literal["fhimcpt.csv"] | ||
|
||
|
||
class FHI_vna(FileType): | ||
filetype: Literal["fhimcpt.vna"] | ||
|
||
|
||
class Fusion_json(FileType): | ||
filetype: Literal["fusion.json"] | ||
|
||
|
||
class Fusion_zip(FileType): | ||
filetype: Literal["fusion.zip"] | ||
|
||
|
||
class Fusion_csv(FileType): | ||
filetype: Literal["fusion.csv"] | ||
|
||
|
||
class Panalytical_xy(FileType): | ||
filetype: Literal["panalytical.xy"] | ||
|
||
|
||
class Panalytical_csv(FileType): | ||
filetype: Literal["panalytical.csv"] | ||
|
||
|
||
class PicoLog_tc08(FileType): | ||
filetype: Literal["picolog.tc08"] | ||
|
||
|
||
class Panalytical_xrdml(FileType): | ||
filetype: Literal["panalytical.xrdml"] | ||
|
||
|
||
class Phi_spe(FileType): | ||
filetype: Literal["phi.spe"] | ||
|
||
|
||
class Quadstar_sac(FileType): | ||
filetype: Literal["quadstar.sac"] | ||
|
||
|
||
class Tomato_json(FileType): | ||
filetype: Literal["tomato.json"] | ||
|
||
|
||
class Touchstone_snp(FileType): | ||
filetype: Literal["touchstone.snp"] | ||
|
||
|
||
classlist = [] | ||
for name, obj in inspect.getmembers(sys.modules[__name__]): | ||
if inspect.isclass(obj) and issubclass(obj, FileType) and obj is not FileType: | ||
classlist.append(obj) | ||
FileTypes = TypeVar("FileTypes", *classlist) | ||
|
||
|
||
class ExtractorFactory(BaseModel): | ||
""" | ||
Extractor factory class. | ||
Given an ``extractor=dict(filetype=k, ...)`` argument, attempts to determine the | ||
correct :class:`FileType`, parses any additionally supplied parameters for that | ||
:class:`FileType`, and back-fills defaults such as ``timezone``, ``locale``, and | ||
``encoding``. | ||
The following is the current usage pattern in :mod:`yadg`: | ||
.. code-block:: | ||
ftype = ExtractorFactory(extractor={"filetype": k}).extractor | ||
""" | ||
|
||
extractor: FileTypes = Field(..., discriminator="filetype") | ||
|
||
@field_validator("extractor") | ||
@classmethod | ||
def extractor_set_defaults(cls, v): | ||
defaults = StepDefaults() | ||
if v.timezone is None: | ||
v.timezone = defaults.timezone | ||
if v.locale is None: | ||
v.locale = defaults.locale | ||
if v.encoding is None: | ||
v.encoding = defaults.encoding | ||
return v |
Oops, something went wrong.