Merge branch 'master' into bmi_run_V4

NOAA-OWP · Nov 24, 2023 · ff8474c · ff8474c
2 parents 7b76c4b + 8286c2b
commit ff8474c
Show file tree

Hide file tree

Showing 64 changed files with 659 additions and 728 deletions.
diff --git a/.github/workflows/ubuntu-latest.yml b/.github/workflows/ubuntu-latest.yml
@@ -47,7 +47,26 @@ jobs:
       run: |
         ./compiler.sh no-e
     
-    - name: Run Test
+    - name: Run V3 Test
       run: |
         cd test/LowerColorado_TX
-        python -m nwm_routing -f -V3 test_AnA.yaml
+        python -m nwm_routing -f -V3 test_AnA.yaml
+        cd ../..
+  
+    - name: Run V4 Test on NHD
+      run: |
+        cd test/LowerColorado_TX
+        python -m nwm_routing -f -V4 test_AnA_V4_NHD.yaml
+        cd ../..
+
+    - name: Run V4 Test on HYFeature
+      run: |
+        cd test/LowerColorado_TX_v4
+        python -m nwm_routing -f -V4 test_AnA_V4_HYFeature.yaml
+        cd ../..
+    
+    - name: Run V4 Test on HYFeature without DA
+      run: |
+        cd test/LowerColorado_TX_v4
+        python -m nwm_routing -f -V4 test_AnA_V4_HYFeature_noDA.yaml
+        cd ../..
diff --git a/src/troute-config/troute/config/config.py b/src/troute-config/troute/config/config.py
@@ -214,3 +214,23 @@ def check_start_datetime(cls, values):
 
         return values
 
+    @root_validator(skip_on_failure=True)
+    def check_flowpath_edge_list(cls, values):
+        geo_file_path = values['network_topology_parameters'].supernetwork_parameters.geo_file_path
+        flowpath_edge_list = values['network_topology_parameters'].supernetwork_parameters.flowpath_edge_list
+        if Path(geo_file_path).suffix=='.json':
+            assert flowpath_edge_list, "geo_file_path is json, but no flowpath_edge_list is provided."
+            assert Path(flowpath_edge_list).suffix=='.json', "geo_file_path is json, but flowpath_edge_list is a different file type."
+
+        return values
+
+    @root_validator(skip_on_failure=True)
+    def check_lite_restart_directory(cls, values):
+        if values['output_parameters']:
+            lite_restart = values['output_parameters'].lite_restart
+            if lite_restart is not None:
+                lite_restart_directory = lite_restart.lite_restart_output_directory
+                assert lite_restart_directory, "lite_restart is present in output parameters, but no lite_restart_output_directory is provided."
+
+        return values
+
diff --git a/src/troute-config/troute/config/network_topology_parameters.py b/src/troute-config/troute/config/network_topology_parameters.py
@@ -36,6 +36,7 @@ class SupernetworkParameters(BaseModel, extra='forbid'):
     # TODO: hopefully places in the code can be changed so this is a `Path` instead of a `str`
     geo_file_path: str
     network_type: Literal["HYFeaturesNetwork", "NHDNetwork"] = "HYFeaturesNetwork"
+    flowpath_edge_list: Optional[str] = None
     mask_file_path: Optional[FilePath] = None
     mask_layer_string: str = ""
     # TODO: determine if this is still used
@@ -121,21 +122,21 @@ class Columns(BaseModel, extra='forbid'):
     # string, channel bottom width
     bw: str 
     # string, waterbody identifier
-    waterbody: str 
+    waterbody: Optional[str] 
     # string, channel top width
     tw: str 
     # string, compound channel top width
     twcc: str 
     # string, channel bottom altitude
-    alt: str 
+    alt: Optional[str] 
     # string, muskingum K parameter
     musk: str 
     # string, muskingum X parameter
     musx: str 
     # string, channel sideslope
     cs: str 
     # string, gage ID
-    gages: str 
+    gages: Optional[str]
 
 
 class WaterbodyParameters(BaseModel, extra='forbid'):

diff --git a/src/troute-network/troute/HYFeaturesNetwork.py b/src/troute-network/troute/HYFeaturesNetwork.py
@@ -14,6 +14,7 @@
 
 import troute.nhd_io as nhd_io #FIXME
 from troute.nhd_network import reverse_dict, extract_connections, reverse_network, reachable
+from .rfc_lake_gage_crosswalk import get_rfc_lake_gage_crosswalk
 
 __verbose__ = False
 __showtiming__ = False
@@ -70,24 +71,29 @@ def read_json(file_path, edge_list):
     with open(edge_list) as edge_file:
         edge_data = json.load(edge_file)
         edge_map = {}
+        wb_id, toid = edge_data[0].keys()
         for id_dict in edge_data:
-            edge_map[ id_dict['id'] ] = id_dict['toid']
+            edge_map[ id_dict[wb_id] ] = id_dict[toid]
         with open(file_path) as data_file:
             json_data = json.load(data_file)  
             for key_wb, value_params in json_data.items():
                 df = pd.json_normalize(value_params)
-                df['id'] = key_wb
-                df['toid'] = edge_map[key_wb]
+                df[wb_id] = key_wb
+                df[toid] = edge_map[key_wb]
                 dfs.append(df)
         df_main = pd.concat(dfs, ignore_index=True)
 
     return df_main
 
+def read_geojson(file_path):
+    flowpaths = gpd.read_file(file_path)
+    return flowpaths
+
 def numeric_id(flowpath):
-    id = flowpath['id'].split('-')[-1]
-    toid = flowpath['toid'].split('-')[-1]
-    flowpath['id'] = int(float(id))
-    flowpath['toid'] = int(float(toid))
+    id = flowpath['key'].split('-')[-1]
+    toid = flowpath['downstream'].split('-')[-1]
+    flowpath['key'] = int(float(id))
+    flowpath['downstream'] = int(float(toid))
     return flowpath
 
 def read_ngen_waterbody_df(parm_file, lake_index_field="wb-id", lake_id_mask=None):
@@ -141,19 +147,19 @@ def node_key_func(x):
 def read_geo_file(supernetwork_parameters, waterbody_parameters, data_assimilation_parameters, cpu_pool):
 
     geo_file_path = supernetwork_parameters["geo_file_path"]
+    flowpaths = lakes = network = pd.DataFrame()
 
     file_type = Path(geo_file_path).suffix
-    if(  file_type == '.gpkg' ):        
+    if(file_type=='.gpkg'):        
         flowpaths, lakes, network = read_geopkg(geo_file_path, 
                                                 data_assimilation_parameters,
                                                 waterbody_parameters,
                                                 cpu_pool)
-        #TODO Do we need to keep .json as an option?
-        '''
-        elif( file_type == '.json') :
-            edge_list = supernetwork_parameters['flowpath_edge_list']
-            self._dataframe = read_json(geo_file_path, edge_list) 
-        '''
+    elif(file_type == '.json'):
+        edge_list = supernetwork_parameters['flowpath_edge_list']
+        flowpaths = read_json(geo_file_path, edge_list)
+    elif(file_type=='.geojson'):
+        flowpaths = read_geojson(geo_file_path)
     else:
         raise RuntimeError("Unsupported file type: {}".format(file_type))
 
@@ -319,31 +325,11 @@ def waterbody_null(self):
 
     def preprocess_network(self, flowpaths):
         self._dataframe = flowpaths
-
-        # Don't need the string prefix anymore, drop it
-        mask = ~ self.dataframe['toid'].str.startswith("tnex") 
-        self._dataframe = self.dataframe.apply(numeric_id, axis=1)
-
-        # handle segment IDs that are also waterbody IDs. The fix here adds a large value
-        # to the segmetn IDs, creating new, unique IDs. Otherwise our connections dictionary
-        # will get confused because there will be repeat IDs...
-        duplicate_wb_segments = self.supernetwork_parameters.get("duplicate_wb_segments", None)
-        duplicate_wb_id_offset = self.supernetwork_parameters.get("duplicate_wb_id_offset", 9.99e11)
-        if duplicate_wb_segments:
-            # update the values of the duplicate segment IDs
-            fix_idx = self.dataframe.id.isin(set(duplicate_wb_segments))
-            self._dataframe.loc[fix_idx,"id"] = (self.dataframe[fix_idx].id + duplicate_wb_id_offset).astype("int64")
-
-        # make the flowpath linkage, ignore the terminal nexus
-        self._flowpath_dict = dict(zip(self.dataframe.loc[mask].toid, self.dataframe.loc[mask].id))
-
-        # **********  need to be included in flowpath_attributes  *************
-        self._dataframe['alt'] = 1.0 #FIXME get the right value for this... 
-
-        cols = self.supernetwork_parameters.get('columns',None)
 
+        cols = self.supernetwork_parameters.get('columns', None)
         if cols:
-            self._dataframe = self.dataframe[list(cols.values())]
+            col_idx = list(set(cols.values()).intersection(set(self.dataframe.columns)))
+            self._dataframe = self.dataframe[col_idx]
             # Rename parameter columns to standard names: from route-link names
             #        key: "link"
             #        downstream: "to"
@@ -361,8 +347,30 @@ def preprocess_network(self, flowpaths):
             #        musx: "MusX"
             #        cs: "ChSlp"  # TODO: rename to `sideslope`
             self._dataframe = self.dataframe.rename(columns=reverse_dict(cols))
-            self._dataframe.set_index("key", inplace=True)
-            self._dataframe = self.dataframe.sort_index()
+
+        # Don't need the string prefix anymore, drop it
+        mask = ~ self.dataframe['downstream'].str.startswith("tnx") 
+        self._dataframe = self.dataframe.apply(numeric_id, axis=1)
+
+        # handle segment IDs that are also waterbody IDs. The fix here adds a large value
+        # to the segmetn IDs, creating new, unique IDs. Otherwise our connections dictionary
+        # will get confused because there will be repeat IDs...
+        duplicate_wb_segments = self.supernetwork_parameters.get("duplicate_wb_segments", None)
+        duplicate_wb_id_offset = self.supernetwork_parameters.get("duplicate_wb_id_offset", 9.99e11)
+        if duplicate_wb_segments:
+            # update the values of the duplicate segment IDs
+            fix_idx = self.dataframe.key.isin(set(duplicate_wb_segments))
+            self._dataframe.loc[fix_idx,"key"] = (self.dataframe[fix_idx].key + duplicate_wb_id_offset).astype("int64")
+
+        # make the flowpath linkage, ignore the terminal nexus
+        self._flowpath_dict = dict(zip(self.dataframe.loc[mask].downstream, self.dataframe.loc[mask].key))
+
+        self._dataframe.set_index("key", inplace=True)
+        self._dataframe = self.dataframe.sort_index()
+
+        # **********  need to be included in flowpath_attributes  *************
+        if 'alt' not in self.dataframe.columns:
+            self._dataframe['alt'] = 1.0 #FIXME get the right value for this... 
 
         # Drop 'gages' column if it is present
         if 'gages' in self.dataframe:
@@ -559,10 +567,9 @@ def preprocess_data_assimilation(self, network):
             if usgs_da:
                 self._waterbody_types_df.loc[self._usgs_lake_gage_crosswalk.index,'reservoir_type'] = 2
             if rfc_da:
-                #FIXME: Temporary fix, read in predefined rfc lake gage crosswalk file for rfc reservoirs.
+                #FIXME: Temporary fix, load predefined rfc lake gage crosswalk info for rfc reservoirs.
                 # Replace relevant waterbody_types as type 4.
-                temp_rfc_file = Path(__file__).parent / 'rfc_lake_gage_crosswalk.csv'
-                rfc_lake_gage_crosswalk = pd.read_csv(temp_rfc_file)
+                rfc_lake_gage_crosswalk = get_rfc_lake_gage_crosswalk().reset_index()
                 self._rfc_lake_gage_crosswalk = rfc_lake_gage_crosswalk[rfc_lake_gage_crosswalk['rfc_lake_id'].isin(self.waterbody_dataframe.index)].set_index('rfc_lake_id')
                 self._waterbody_types_df.loc[self._rfc_lake_gage_crosswalk.index,'reservoir_type'] = 4
             else: