Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

First pass on removing nan values in model from obs with colocate time #755

Draft
wants to merge 1 commit into
base: main-dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pyaerocom/colocateddata.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,10 +526,12 @@ def resample_time(
colocate_time = self.metadata["colocate_time"]

# if colocate time is activated, remove datapoints from model, where
# there is no observation
# there is no observation, or vice versa
if colocate_time:
mask = np.isnan(col.data[0]).data
mask_model = np.isnan(col.data[1]).data
col.data.data[1][mask] = np.nan
col.data.data[0][mask_model] = np.nan

res = TimeResampler(col.data)
data_arr = res.resample(
Expand Down
6 changes: 5 additions & 1 deletion pyaerocom/colocation.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,11 +522,13 @@ def _colocate_site_data_helper_timecol(
min_num_obs=min_num_obs,
inplace=True,
)

# Save time indices of the observations and a mask of where it is NaN
obs_idx = stat_data_ref[var_ref].index
obs_isnan = stat_data_ref[var_ref].isnull()

# loc of nan model values
mod_isnan = stat_data[var].isnull()

# now both StationData objects are in the same resolution, but they still
# might have gaps in their time axis, thus concatenate them in a DataFrame,
# which will merge the time index
Expand All @@ -538,7 +540,9 @@ def _colocate_site_data_helper_timecol(
merged = merged.interpolate("index").reindex(obs_idx).loc[obs_idx]
# Set to NaN at times when observations were NaN originally
# (because the interpolation will interpolate the 'ref' column as well)
# Sets to NaN at times where mod were NaN originally
merged.loc[obs_isnan] = np.nan
merged.loc[mod_isnan] = np.nan
# due to interpolation some model values may be NaN, where there is obs
merged.loc[merged.data.isnull()] = np.nan
# Ensure the whole timespan of the model is kept in "merged"
Expand Down
4 changes: 3 additions & 1 deletion pyaerocom/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1756,7 +1756,9 @@ def make_dummy_cube(

latdim.guess_bounds()
londim.guess_bounds()
dummy = iris.cube.Cube(np.ones((len(times), len(lats), len(lons))), units=unit)

data = np.ones((len(times), len(lats), len(lons)))
dummy = iris.cube.Cube(data, units=unit)

dummy.add_dim_coord(latdim, 1)
dummy.add_dim_coord(londim, 2)
Expand Down