NOAA-OWP · AminTorabi-NOAA · Nov 14, 2023
diff --git a/src/model_DAforcing.py b/src/model_DAforcing.py
@@ -6,11 +6,12 @@
 import xarray as xr
 import glob
 import pathlib
-
+import logging
 from troute.routing.fast_reach.reservoir_RFC_da import _validate_RFC_data
-
+import netCDF4
 from nwm_routing.log_level_set import log_level_set
 from troute.config import Config
+LOG = logging.getLogger('')
 
 class DAforcing_model():
 
@@ -315,13 +316,17 @@ def run(self, values: dict):
         """
         output_parameters = self._output_parameters
         da_parameters = self._data_assimilation_parameters
-
-        if output_parameters['lite_restart'] is not None:
-            _write_lite_restart(
-                values,
-                self._t0,
-                output_parameters['lite_restart']
-            )   
+
+        q0 = values.get('q0')
+        waterbody_df = values.get('waterbody_df')
+        lastobs_df = values.get('lastobs_df')
+        if len(q0) != 0 and len(waterbody_df) != 0 and len(lastobs_df) != 0:
+            if output_parameters['lite_restart'] is not None:
+                _write_lite_restart(
+                    values,
+                    self._t0,
+                    output_parameters['lite_restart']
+                )    
 
         lastobs_output_folder = da_parameters.get('streamflow_da',{}).get('lastobs_output_folder', None)
         if lastobs_output_folder:
@@ -331,9 +336,20 @@ def run(self, values: dict):
                 lastobs_output_folder
             )
 
-        if output_parameters.get('stream_output',{}).get('qvd_nudge'):
-            #write flowveldepth file
-            pass
+        # if output_parameters.get('stream_output',{}) is not None and output_parameters.get('stream_output',{}).get('qvd_nudge'):
+        if output_parameters.get('stream_output',{}):
+
+            t0 = self._t0
+            stream_output_directory =  output_parameters.get('stream_output').get('stream_output_directory')
+            stream_output_type = output_parameters.get('stream_output').get('stream_output_type')
+            stream_output_internal_frequency = output_parameters.get('stream_output').get('stream_output_internal_frequency')
+            stream_output_time = output_parameters.get('stream_output').get('stream_output_time')
+            write_flowveldepth_netcdf(values,
+                                      stream_output_directory,   
+                                      t0,
+                                      stream_output_time, 
+                                      stream_output_type,
+                                      stream_output_internal_frequency)
 
 
 def _time_stations_from_df(dataFrame, timeBase):
@@ -725,6 +741,9 @@ def _read_config_file(custom_input_file):
     data_assimilation_parameters = compute_parameters.get("data_assimilation_parameters")
     output_parameters = config_dict.get('output_parameters')
 
+    # configure python logger
+    log_level_set(config_dict.get('log_parameters'))
+
     return (
         compute_parameters,
         forcing_parameters,
@@ -976,9 +995,9 @@ def _write_lite_restart(
         waterbodies_df = pd.DataFrame(data=waterbodies_df.reshape(len(waterbodies_ids), -1), 
                                       index=waterbodies_ids,
                                       columns=['ifd','LkArea','LkMxE','OrificeA','OrificeC','OrificeE',
-                                               'WeirC','WeirE','WeirL','qd0','h0','index'])
+                                               'WeirC','WeirE','WeirL','lon', 'lat', 'crs', 'qd0','h0','index'])
         waterbodies_df.index.name = 'lake_id'
-
+        waterbodies_df.drop(columns=['lon', 'lat', 'crs'], inplace=True)
         timestamp = t0 + timedelta(seconds=values['t-route_model_time'])
 
         # create restart filenames
@@ -1037,4 +1056,161 @@ def _write_lastobs(
     if isinstance(lastobs_output_folder, pathlib.Path):
         lastobs_output_folder = str(lastobs_output_folder)
     output_path = pathlib.Path(lastobs_output_folder + "/nudgingLastObs." + modelTimeAtOutput_str + ".nc").resolve()
-    ds.to_netcdf(str(output_path))
+    ds.to_netcdf(str(output_path))
+
+def write_flowveldepth_netcdf(values,
+                              stream_output_directory,   
+                              t0, 
+                              stream_output_timediff = 1, 
+                              stream_output_type = '.nc',
+                              stream_output_internal_frequency = 5):
+    '''
+    Write the results of flowveldepth and nudge to netcdf- break. 
+    Arguments
+    -------------
+    stream_output_directory (Path or string) - directory where file will be created
+    flowveldepth (DataFrame) -  including flowrate, velocity, and depth for each time step
+    nudge (numpy.ndarray) - nudge data with shape (76, 289)
+    usgs_positions_id (array) - Position ids of usgs gages
+    '''
+    ## getting the flatten value of nudge
+    flatten_nudge = values.get('nudging')
+    nudging_ids = values.get('nudging_ids')
+    # reconstruct the df
+    num_rows_ndg = len(nudging_ids)
+    num_columns_ndg = len(flatten_nudge) // len(nudging_ids)
+    nudge = pd.DataFrame(flatten_nudge.reshape(num_rows_ndg, num_columns_ndg), index=nudging_ids)
+
+    ## getting the flatten value of flowveldepth
+    flatten_fvd = values.get('fvd_results')
+    fvd_index = values.get('fvd_index')
+    # reconstruct the df
+    num_rows_fvd = len(fvd_index)
+    num_columns_fvd = len(flatten_fvd) // len(fvd_index)
+    flowveldepth = pd.DataFrame(flatten_fvd.reshape(num_rows_fvd, num_columns_fvd), index=fvd_index)
+    # Number of timesteps and features
+    nsteps = len(flowveldepth.columns) // 3
+    num_features = len(flowveldepth)
+    nstep_nc = 12 * stream_output_timediff
+    gage, nudge_timesteps = nudge.shape
+    column_name = [(i, attr) for i in range(nsteps) for attr in ['q', 'v', 'd']]
+    flowveldepth.columns = column_name
+    #--------- Add 'nudge' column based on usgs_positions_id----------
+
+    # Create a copy of the flowveldepth DataFrame to add 'ndg' columns
+    qvd_ndg = flowveldepth.copy()
+    # Create a list for names of the columns for nudge values
+
+    ndg_columns = [(j,'ndg') for j in range(nsteps)]
+    # renaming nudge columns
+    nudge.columns = ndg_columns
+    # Left joining qvd and nudge values on index
+    qvd_ndg = pd.merge(qvd_ndg, nudge, left_index=True, right_index=True, how='left')
+
+    new_order = [(i, attr) for i in range(nsteps) for attr in ['q', 'v', 'd', 'ndg']]
+    # Reorder the columns
+    qvd_ndg = qvd_ndg[new_order]
+    # renaming the columns based on timestamp
+    column_name_timeStamp = [t0 + timedelta(minutes=(i * 5)) for i in range(nsteps)]
+    new_column_name_timeStamp = [(attr + '_' +str(cnt) + '_' + times.strftime('%Y%m%d%H%M')) for cnt, times in enumerate(column_name_timeStamp) for attr in ['q', 'v', 'd', 'ndg']]
+    qvd_ndg.columns = new_column_name_timeStamp
+
+    hr = values.get('t-route_model_time') // 3600 - 1
+    # Create time step values based on t0
+    time_steps = [t0 + timedelta(hours= hr)]
+    time_dim = [t * stream_output_internal_frequency*60 for t in range(1, int(1 * 60 / stream_output_internal_frequency) + 1)]
+
+    for counter, i in enumerate(range(0, nsteps, nstep_nc)):
+        # Define the range of columns for this file
+        start_col = i * 4
+        end_col = min((i + nstep_nc) * 4 , nsteps * 4)
+        selected_col = stream_output_internal_frequency // 5
+        # Create a subset DataFrame for the current range of columns
+        # subset_df = qvd_ndg.iloc[:, start_col:end_col]
+        # Create a list of column names to keep
+        columns_to_keep = [col for col in qvd_ndg.columns[start_col:end_col] if int(col.split('_')[1]) % selected_col == 0]
+        subset_df = qvd_ndg[columns_to_keep]
+        subset_df.columns = ['_'.join([col.split('_')[0], col.split('_')[2]]) for col in subset_df.columns]
+
+        # Create the file name based on the current time step
+        current_time_step = time_steps[counter].strftime('%Y%m%d%H%M')
+        if stream_output_directory:
+            if stream_output_type =='.nc':
+                file_name = f"{current_time_step}.flowveldepth.nc"
+
+            elif stream_output_type=='.csv':
+                file_name = f"{current_time_step}.flowveldepth.csv"
+                # Save the data to CSV file
+                subset_df.to_csv(f"{stream_output_directory}/{file_name}", index=True)
+                LOG.debug(f"Flowveldepth data saved as CSV files in {stream_output_directory}")
+
+            elif stream_output_type=='.pkl':
+                file_name = f"{current_time_step}.flowveldepth.pkl"
+                # Save the data to Pickle file
+                subset_df.to_pickle(f"{stream_output_directory}/{file_name}")
+                LOG.debug(f"Flowveldepth data saved as PICKLE files in {stream_output_directory}")
+
+            else:
+                print('WRONG FORMAT')
+
+        if stream_output_directory:
+            if (stream_output_type =='.nc'):
+                # Open netCDF4 Dataset in write mode
+                with netCDF4.Dataset(
+                    filename=f"{stream_output_directory}/{file_name}",
+                    mode='w',
+                    format='NETCDF4'
+                ) as ncfile:
+
+                    # ============ DIMENSIONS ===================
+                    _ = ncfile.createDimension('feature_id', None)
+                    _ = ncfile.createDimension('time_step (sec)', subset_df.iloc[:, 0::4].shape[1])
+                    _ = ncfile.createDimension('gage', gage)
+                    _ = ncfile.createDimension('nudge_timestep', nudge_timesteps)  # Add dimension for nudge time steps
+
+                    # =========== q,v,d,ndg VARIABLES ===============
+                    for counters, var in enumerate(['flowrate', 'velocity', 'depth', 'nudge']):
+                        QVD = ncfile.createVariable(
+                            varname=var,
+                            datatype=np.float32,
+                            dimensions=('feature_id', 'time_step (sec)',),
+                        )
+
+                        QVD.units = 'm3/s m/s m m3/s'
+                        QVD.description = f'Data for {var}'
+
+                        # Prepare data for writing
+                        data_array = subset_df.iloc[:, counters::4].to_numpy(dtype=np.float32)
+
+                        # Set data for each feature_id and time_step
+                        ncfile.variables[var][:] = data_array
+                    feature_id = ncfile.createVariable(
+                        varname='feature_id',
+                        datatype=np.int32,
+                        dimensions=('feature_id',),
+                    )
+                    feature_id[:] = flowveldepth.index.to_numpy(dtype=np.int32)
+                    feature_id.units = 'None'
+                    feature_id.description = 'Feature IDs'
+                    ###
+                    time_step = ncfile.createVariable(
+                        varname='time_step (sec)',
+                        datatype=np.int32,
+                        dimensions=('time_step (sec)',),
+                    )
+                    time_step[:] = np.array(time_dim[:subset_df.iloc[:, 0::4].shape[1]], dtype=np.int32)
+                    time_step.units = 'sec'
+                    time_step.description = 'time stamp'
+                    # =========== GLOBAL ATTRIBUTES ===============
+                    ncfile.setncatts(
+                        {
+                            'TITLE': 'OUTPUT FROM T-ROUTE',
+                            'Time step (sec)': f'{stream_output_internal_frequency}',
+                            'model_initialization_time': t0.strftime('%Y-%m-%d_%H:%M:%S'),
+                            'model_reference_time': time_steps[counter].strftime('%Y-%m-%d_%H:%M:%S'),
+                            'comment': f'The file includes {stream_output_timediff} hour data which includes {len(time_dim)} timesteps',
+                            'code_version': '',
+                        }
+                    )
+                    LOG.debug(f"Flowveldepth data saved as NetCDF files in {stream_output_directory}")
+
diff --git a/src/troute_model.py b/src/troute_model.py
@@ -96,6 +96,7 @@ def preprocess_static_vars(self, values: dict):
             compute_parameters=self._compute_parameters,
             hybrid_parameters=self._hybrid_parameters,
             preprocessing_parameters=self._preprocessing_parameters,
+            output_parameters = self._output_parameters, 
             from_files=False, value_dict=values,
             bmi_parameters=self._bmi_parameters,)
 
@@ -142,7 +143,7 @@ def run(self, values: dict, until=300):
         # Forcing values:
         if self.showtiming:
             forcing_start_time = time.time()
-
+        
         qlats_df = pd.DataFrame(values['land_surface_water_source__volume_flow_rate'],
                                 index=values['land_surface_water_source__id'])
         qlats_df = qlats_df[qlats_df.index.isin(self._network.segment_index)]
@@ -280,8 +281,8 @@ def run(self, values: dict, until=300):
         nudge = np.concatenate([r[8] for r in self._run_results])[:,1:]
         usgs_positions_id = np.concatenate([r[3][0] for r in self._run_results]).astype(int)
         self._nudge = pd.DataFrame(data=nudge, index=usgs_positions_id)
-        values['nudging'] = self._nudge
-        values['nudging_ids'] = usgs_positions_id
+        values['nudging'] = self._nudge.values.flatten()
+        values['nudging_ids'] = self._nudge.index
 
         # Get output from final timestep
         (values['channel_exit_water_x-section__volume_flow_rate'], 
@@ -381,7 +382,7 @@ def _read_config_file(custom_input_file): #TODO: Update this function, I dont' t
 
     troute_configuration = Config(**data)
     config_dict = troute_configuration.dict()
-
+    
     log_parameters = config_dict.get('log_parameters')
     compute_parameters = config_dict.get('compute_parameters')
     network_topology_parameters = config_dict.get('network_topology_parameters')