Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding in new function #845

Merged
merged 13 commits into from
Jul 25, 2024
109 changes: 109 additions & 0 deletions act/utils/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1410,3 +1410,112 @@ def calculate_percentages(ds, fields, time=None, time_slice=None, threshold=None
percentages[i] = j
ds_percent.close()
return percentages


def convert_2d_to_1d(ds_object, parse=None, variables=None, keep_name_if_one=False,
ajsockol marked this conversation as resolved.
Show resolved Hide resolved
use_dim_value_in_name=False, dim_labels=None):
"""
Function to convert a single 2D variable into multiple 1D
variables using the second dimension in the new variable name.

...
ajsockol marked this conversation as resolved.
Show resolved Hide resolved


Parameters
----------
ds_object: xarray.dataset
Object containing 2D variable to be converted
parse: str or None
Coordinate dimension name to parse along. If set to None will
guess the non-time dimension is the parse dimension.
variables: str or list of str
Variable name or names to parse. If not provided will attempt to
parse all two dimensional variables with the parse coordinate
dimension.
keep_name_if_one: boolean
Option to not modify the variable name if the coordinate dimension
has only one value. Essentially converting a 2D (i.e. (100,1)
variable into a 1D variable (i.e. (100)).
use_dim_value_in_name: boolean
Option to use value from the coordinate dimension in new variable
name instead of indexing number. Will use the value prepended
to the units of the dimension.
dim_labels: str or list of str
Allows for use of custom label to append to end of variable names

ajsockol marked this conversation as resolved.
Show resolved Hide resolved

Returns
-------
A new object copied from input object with the multi-dimensional
variable split into multiple single-dimensional variables.


ajsockol marked this conversation as resolved.
Show resolved Hide resolved
Example
-------
# This will get the name of the coordinate dimension so it does not need to
# be hard coded.
>>> parse_dim = (list(set(list(ds_object.dims)) - set(['time'])))[0]

# Now use the parse_dim name to parse the variable and return new object.
>>> new_ds_object = convert_2d_to_1d(ds_object, parse=parse_dim)

"""

# If no parse dimension name given assume it is the one not equal to 'time'
if parse is None:
parse = (list(set(list(ds_object.dims)) - set(['time'])))[0]

new_ds_object = ds_object.copy()
ajsockol marked this conversation as resolved.
Show resolved Hide resolved

if variables is not None and isinstance(variables, str):
variables = [variables]

if variables is None:
variables = list(new_ds_object.variables)

if dim_labels is not None and isinstance(dim_labels, (str, )):
dim_labels = [dim_labels]

# Check if we want to keep the names the same if the second dimension
# is of size one.
num_dims = 1
if keep_name_if_one:
num_dims = 2

parse_values = ds_object[parse].values
for var in variables:
if var == parse:
continue
# Check if the parse dimension is in the dimension tuple
if parse in new_ds_object[var].dims:
if len((new_ds_object[parse])) >= num_dims:
for i in range(0, new_ds_object.sizes[parse]):
if (dim_labels is not None):
new_var_name = '_'.join([var, dim_labels[i]])
elif use_dim_value_in_name:
level = str(parse_values[i]) + ds_object[parse].attrs['units']
new_var_name = '_'.join([var, parse, level])
else:
new_var_name = '_'.join([var, parse, str(i)])
new_var = new_ds_object[var].copy()
new_ds_object[new_var_name] = new_var.isel(indexers={parse: i})

try:
ancillary_variables = new_ds_object[new_var_name].attrs['ancillary_variables']
current_qc_var_name = ds_object.qcfilter.check_for_ancillary_qc(
var, add_if_missing=False)
if current_qc_var_name is not None:
ancillary_variables = ancillary_variables.replace(
current_qc_var_name, 'qc_' + new_var_name)
new_ds_object[new_var_name].attrs['ancillary_variables'] = ancillary_variables
except KeyError:
pass

# Remove the old 2D variable after extracting
del new_ds_object[var]

else:
# Keep the same name but remove the dimension equal to size 1
new_ds_object[var] = new_ds_object[var].squeeze(dim=parse)

return new_ds_object
Loading