diff --git a/.gitignore b/.gitignore index bac4fbf326..94d5bd9376 100644 --- a/.gitignore +++ b/.gitignore @@ -189,6 +189,7 @@ stacktest obs_rwtest test_quad_irreg_interp test_quad_reg_interp +test_table_read test_ran_unif # Directories to NOT IGNORE ... same as executable names diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 137a4c0063..1b2f991b34 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -22,6 +22,60 @@ individual files. The changes are now listed with the most recent at the top. +**January 11 2024 :: QCEFF. Tag v11.0.0** + +Nonlinear and Non-Gaussian Data Assimilation Capabilities in DART + +- Adds a Quantile-Conserving Ensemble Filtering Framework (QCEFF) to DART. + Publications: `QCEFF part1 `_, + `QCEFF part 2 `_. + +- The default QCEFF options are EAKF, normal distribution (no bounds). + +- User interface changes: + + - filter_kind is now a per-qty option through QCEFF table. + + - Two new required namelists (add to input.nml files): + + - probit_transform_nml + - algorithm_info_nml + + - assim_tools_mod namelist: + + - sort_obs_inc namelist option applied to ENKF only, so default is now .true. + - ``spread_restoration`` is not supported in this version + + - algorithm_info_mod QCEFF options read at runtime from .csv or .txt file + + +- New probability distribution modules: + + - beta_distribution_mod *contributed by Chris Riedel* + - bnrh_distribution_mod (bounded normal rank histogram) + - gamma_distribution_mod + - normal_distribution_mod + + - probit_transform_mod + - distribution_params_mod + +- Update to lorenz_96_tracer_advection: + + - positive_tracer + - more tracer namelist options available and changed defaults + - updated perturbation routine + - bug-fix: real(r8) rather than real(i8) + +- Fix: obs_def_1d_state_mod (oned forward operators): + + - For non-integer powers, fix up values for negative bases + +- Documentation: + + - main page section on Nonlinear and Non-Gaussian Data Assimilation Capabilities in DART + - QCEFF instructions: Quantile-Conserving Ensemble Filter Framework + - Example to work through: QCEFF: Examples with the Lorenz 96 Tracer Model + **January 9 2024 :: Derecho CLM-DART. Tag v10.10.1** - CLM-DART scripting updated for Derecho. @@ -74,13 +128,26 @@ bug-fixes: bug-fixes: - filter_mod.dopperlerfold in sync with filter_mod -- unnecessary loop removed from Mersenne twister developer test +- unnecessary loop removed from Mersenne twister developer test doc-fixes: - rename assim_model_mod.rst to match the module -- fix various Sphinx warnings and broken link - +- fix various Sphinx warnings and broken link + +**November 2 2023 :: QCEFF Input Table. Tag v11.1.0-alpha** + +- The QCEFF input table allows for the specification of QCEFF/probit + input options, per QTY, at runtime. +- This replaces the functionality of using an algorithm_info_mod specific + to the model, which meant editing algorithm_info_mod.f90 to specify + which distribution should be used for which quantity. +- The algorithm_info_mod files for the lorenz_96_tracer_advection model + examples have been replaced with set QCF tables (all_bnrhf_qcf_table.csv, + all_eakf_qcf_table.csv, state_eakf_tracer_bnrhf_qcf_table.csv, + neg_qcf_table.csv) and can be found in lorenz_96_tracer_advection/work. +- Removed the ‘global’ version of filter_kind from assim_tools_mod.f90 + and the &assim_tools_nml **October 5 2023 :: WRF-DART tutorial diagnostic section. Tag v10.8.5** diff --git a/assimilation_code/modules/assimilation/adaptive_inflate_mod.f90 b/assimilation_code/modules/assimilation/adaptive_inflate_mod.f90 index 41166b7ef5..fabb823100 100644 --- a/assimilation_code/modules/assimilation/adaptive_inflate_mod.f90 +++ b/assimilation_code/modules/assimilation/adaptive_inflate_mod.f90 @@ -722,8 +722,6 @@ subroutine update_varying_state_space_inflation(inflate, inflate_mean, inflate_s logical, intent(in) :: inflate_only real(r8) :: gamma, ens_var_deflate, r_var, r_mean -real(r8) :: diff_sd, outlier_ratio -logical :: do_adapt_inf_update if(inflate_mean <= 0.0_r8 .or. inflate_sd <= 0.0_r8) return diff --git a/assimilation_code/modules/assimilation/algorithm_info_mod.f90 b/assimilation_code/modules/assimilation/algorithm_info_mod.f90 new file mode 100644 index 0000000000..42d5f0c218 --- /dev/null +++ b/assimilation_code/modules/assimilation/algorithm_info_mod.f90 @@ -0,0 +1,620 @@ +! DART software - Copyright UCAR. This open source software is provided +! by UCAR, "as is", without charge, subject to all terms of use at +! http://www.image.ucar.edu/DAReS/DART/DART_download + +module algorithm_info_mod + +! Provides routines that give information about details of algorithms for +! observation error sampling, observation increments, and the transformations +! for regression and inflation in probit space. + +use types_mod, only : r8, i8, MISSING_R8, obstypelength + +use obs_def_mod, only : obs_def_type, get_obs_def_type_of_obs, get_obs_def_error_variance +use obs_kind_mod, only : get_quantity_for_type_of_obs, get_name_for_quantity, get_index_for_quantity + +use utilities_mod, only : error_handler, E_ERR, E_MSG, open_file, close_file, to_upper, & + do_nml_file, do_nml_term, nmlfileunit, check_namelist_read, & + find_namelist_in_file + +use assim_model_mod, only : get_state_meta_data +use location_mod, only : location_type + +use distribution_params_mod, only : NORMAL_DISTRIBUTION, BOUNDED_NORMAL_RH_DISTRIBUTION, & + GAMMA_DISTRIBUTION, BETA_DISTRIBUTION, LOG_NORMAL_DISTRIBUTION, UNIFORM_DISTRIBUTION, & + PARTICLE_FILTER_DISTRIBUTION + +implicit none +private + +character(len=512) :: errstring +character(len=*), parameter :: source = 'algorithm_info_mod.f90' + +logical :: module_initialized = .false. +logical :: use_qty_defaults = .true. + +! Defining parameters for different observation space filters +integer, parameter :: EAKF = 1 +integer, parameter :: ENKF = 2 +integer, parameter :: KERNEL = 3 +integer, parameter :: OBS_PARTICLE = 4 +integer, parameter :: UNBOUNDED_RHF = 8 +integer, parameter :: GAMMA_FILTER = 11 +integer, parameter :: BOUNDED_NORMAL_RHF = 101 + +public :: obs_error_info, probit_dist_info, obs_inc_info, & + init_algorithm_info_mod, end_algorithm_info_mod, & + EAKF, ENKF, BOUNDED_NORMAL_RHF, UNBOUNDED_RHF, & + GAMMA_FILTER, KERNEL, OBS_PARTICLE + +! type definitions for the QCF table +type obs_error_info_type + logical :: bounded_below, bounded_above + real(r8) :: lower_bound, upper_bound +end type + +type probit_inflation_type + integer :: dist_type + logical :: bounded_below, bounded_above + real(r8) :: lower_bound, upper_bound +end type + +type probit_state_type + integer :: dist_type + logical :: bounded_below, bounded_above + real(r8) :: lower_bound, upper_bound +end type + +type probit_extended_state_type + integer :: dist_type + logical :: bounded_below, bounded_above + real(r8) :: lower_bound, upper_bound +end type + +type obs_inc_info_type + integer :: filter_kind + logical :: bounded_below, bounded_above + real(r8) :: lower_bound, upper_bound +end type + +type algorithm_info_type + type(obs_error_info_type) :: obs_error_info + type(probit_inflation_type) :: probit_inflation + type(probit_state_type) :: probit_state + type(probit_extended_state_type) :: probit_extended_state + type(obs_inc_info_type) :: obs_inc_info +end type + +integer, parameter :: HEADER_LINES = 2 +character(len=129), dimension(4) :: header1 +character(len=129), dimension(25) :: header2 ! Number of table columns plus 1 + +integer, allocatable :: specified_qtys(:) +type(algorithm_info_type), allocatable :: qceff_table_data(:) + +character(len=129), allocatable :: dist_type_string_probit_inflation(:) +character(len=129), allocatable :: dist_type_string_probit_state(:) +character(len=129), allocatable :: dist_type_string_probit_extended_state(:) +character(len=129), allocatable :: filter_kind_string(:) + +! namelist +character(len=129) :: qceff_table_filename = '' + +namelist /algorithm_info_nml/ qceff_table_filename + +contains + +!------------------------------------------------------------------------- + + +subroutine init_algorithm_info_mod() + +! Gets number of lines/QTYs in the QCF table, allocates space for the table data + + +integer :: fileid +integer :: io, iunit + +integer :: numrows +integer :: nlines + +if (module_initialized) return +module_initialized = .true. + +! Read the namelist entry +call find_namelist_in_file("input.nml", "algorithm_info_nml", iunit) +read(iunit, nml = algorithm_info_nml, iostat = io) +call check_namelist_read(iunit, io, "algorithm_info_nml") + +if (do_nml_file()) write(nmlfileunit, nml=algorithm_info_nml) +if (do_nml_term()) write( * , nml=algorithm_info_nml) + + +if (qceff_table_filename == '') then + write(errstring,*) 'No QCF table file listed in namelist, using default values for all QTYs' + call error_handler(E_MSG, 'init_algorithm_info_mod:', errstring, source) + return +endif + +use_qty_defaults = .false. +fileid = open_file(trim(qceff_table_filename), 'formatted', 'read') + +! Do loop to get number of rows (or QTYs) in the table +nlines = 0 +do + read(fileid,*,iostat=io) + if(io/=0) exit + nlines = nlines + 1 +end do + +call close_file(fileid) + +numrows = nlines - HEADER_LINES + +allocate(specified_qtys(numrows)) +allocate(qceff_table_data(numrows)) +allocate(dist_type_string_probit_inflation(numrows)) +allocate(dist_type_string_probit_state(numrows)) +allocate(dist_type_string_probit_extended_state(numrows)) +allocate(filter_kind_string(numrows)) + +call read_qceff_table(qceff_table_filename) +call assert_qceff_table_version() +call verify_qceff_table_data() +call log_qceff_table_data() + +end subroutine init_algorithm_info_mod + +!------------------------------------------------------------------------ + + +subroutine read_qceff_table(qceff_table_filename) + +! Reads in the QCEFF input options from tabular data file + +character(len=129), intent(in) :: qceff_table_filename + +integer :: fileid +integer :: row +character(len=obstypelength) :: qty_string + +if (.not. module_initialized) call init_algorithm_info_mod() + +fileid = open_file(trim(qceff_table_filename), 'formatted', 'read') + +! skip the headers +read(fileid, *) header1 +read(fileid, *) header2 + +! read in table values directly to qceff_table_data type +do row = 1, size(qceff_table_data) + read(fileid, *) qty_string, qceff_table_data(row)%obs_error_info%bounded_below, qceff_table_data(row)%obs_error_info%bounded_above, & + qceff_table_data(row)%obs_error_info%lower_bound, qceff_table_data(row)%obs_error_info%upper_bound, dist_type_string_probit_inflation(row), & + qceff_table_data(row)%probit_inflation%bounded_below, qceff_table_data(row)%probit_inflation%bounded_above, & + qceff_table_data(row)%probit_inflation%lower_bound, qceff_table_data(row)%probit_inflation%upper_bound, dist_type_string_probit_state(row), & + qceff_table_data(row)%probit_state%bounded_below, qceff_table_data(row)%probit_state%bounded_above, & + qceff_table_data(row)%probit_state%lower_bound, qceff_table_data(row)%probit_state%upper_bound, dist_type_string_probit_extended_state(row), & + qceff_table_data(row)%probit_extended_state%bounded_below, qceff_table_data(row)%probit_extended_state%bounded_above, & + qceff_table_data(row)%probit_extended_state%lower_bound, qceff_table_data(row)%probit_extended_state%upper_bound, & + filter_kind_string(row), qceff_table_data(row)%obs_inc_info%bounded_below, qceff_table_data(row)%obs_inc_info%bounded_above, & + qceff_table_data(row)%obs_inc_info%lower_bound, qceff_table_data(row)%obs_inc_info%upper_bound + + call to_upper(qty_string) + specified_qtys(row) = get_index_for_quantity(qty_string) + + if(specified_qtys(row) == -1) then + write(errstring,*) trim(qty_string), ' is not a valid DART QTY' + call error_handler(E_ERR, 'read_qceff_table:', errstring, source) + endif + + ! Converting the distribution types (read in from table as a string) to its corresponding int value + call to_upper(dist_type_string_probit_inflation(row)) + + select case (trim(dist_type_string_probit_inflation(row))) + case ('NORMAL_DISTRIBUTION') + qceff_table_data(row)%probit_inflation%dist_type = NORMAL_DISTRIBUTION + case ('BOUNDED_NORMAL_RH_DISTRIBUTION') + qceff_table_data(row)%probit_inflation%dist_type = BOUNDED_NORMAL_RH_DISTRIBUTION + case ('GAMMA_DISTRIBUTION') + qceff_table_data(row)%probit_inflation%dist_type = GAMMA_DISTRIBUTION + case ('BETA_DISTRIBUTION') + qceff_table_data(row)%probit_inflation%dist_type = BETA_DISTRIBUTION + case ('LOG_NORMAL_DISTRIBUTION') + qceff_table_data(row)%probit_inflation%dist_type = LOG_NORMAL_DISTRIBUTION + case ('UNIFORM_DISTRIBUTION') + qceff_table_data(row)%probit_inflation%dist_type = UNIFORM_DISTRIBUTION + !!!case ('PARTICLE_FILTER_DISTRIBUTION') + !!!qceff_table_data(row)%probit_inflation%dist_type = PARTICLE_FILTER_DISTRIBUTION + case default + write(errstring, *) 'Invalid distribution type for probit inflation: ', trim(dist_type_string_probit_inflation(row)) + call error_handler(E_ERR, 'read_qceff_table:', errstring, source) + end select + + + call to_upper(dist_type_string_probit_state(row)) + + select case (trim(dist_type_string_probit_state(row))) + case ('NORMAL_DISTRIBUTION') + qceff_table_data(row)%probit_state%dist_type = NORMAL_DISTRIBUTION + case ('BOUNDED_NORMAL_RH_DISTRIBUTION') + qceff_table_data(row)%probit_state%dist_type = BOUNDED_NORMAL_RH_DISTRIBUTION + case ('GAMMA_DISTRIBUTION') + qceff_table_data(row)%probit_state%dist_type = GAMMA_DISTRIBUTION + case ('BETA_DISTRIBUTION') + qceff_table_data(row)%probit_state%dist_type = BETA_DISTRIBUTION + case ('LOG_NORMAL_DISTRIBUTION') + qceff_table_data(row)%probit_state%dist_type = LOG_NORMAL_DISTRIBUTION + case ('UNIFORM_DISTRIBUTION') + qceff_table_data(row)%probit_state%dist_type = UNIFORM_DISTRIBUTION + !!!case ('PARTICLE_FILTER_DISTRIBUTION') + !!!qceff_table_data(row)%probit_state%dist_type = PARTICLE_FILTER_DISTRIBUTION + case default + write(errstring, *) 'Invalid distribution type for probit state: ', trim(dist_type_string_probit_state(row)) + call error_handler(E_ERR, 'read_qceff_table:', errstring, source) + end select + + call to_upper(dist_type_string_probit_extended_state(row)) + + select case (trim(dist_type_string_probit_extended_state(row))) + case ('NORMAL_DISTRIBUTION') + qceff_table_data(row)%probit_extended_state%dist_type = NORMAL_DISTRIBUTION + case ('BOUNDED_NORMAL_RH_DISTRIBUTION') + qceff_table_data(row)%probit_extended_state%dist_type = BOUNDED_NORMAL_RH_DISTRIBUTION + case ('GAMMA_DISTRIBUTION') + qceff_table_data(row)%probit_extended_state%dist_type = GAMMA_DISTRIBUTION + case ('BETA_DISTRIBUTION') + qceff_table_data(row)%probit_extended_state%dist_type = BETA_DISTRIBUTION + case ('LOG_NORMAL_DISTRIBUTION') + qceff_table_data(row)%probit_extended_state%dist_type = LOG_NORMAL_DISTRIBUTION + case ('UNIFORM_DISTRIBUTION') + qceff_table_data(row)%probit_extended_state%dist_type = UNIFORM_DISTRIBUTION + !!!case ('PARTICLE_FILTER_DISTRIBUTION') + !!!qceff_table_data(row)%probit_extended_state%dist_type = PARTICLE_FILTER_DISTRIBUTION + case default + write(errstring, *) 'Invalid distribution type for probit extended state: ', trim(dist_type_string_probit_extended_state(row)) + call error_handler(E_ERR, 'read_qceff_table:', errstring, source) + end select + + + ! Converting the filter kind (read in from table as a string) to its corresponding int value + call to_upper(filter_kind_string(row)) + + select case (trim(filter_kind_string(row))) + case ('EAKF') + qceff_table_data(row)%obs_inc_info%filter_kind = EAKF + case ('ENKF') + qceff_table_data(row)%obs_inc_info%filter_kind = ENKF + case ('UNBOUNDED_RHF') + qceff_table_data(row)%obs_inc_info%filter_kind = UNBOUNDED_RHF + case ('GAMMA_FILTER') + qceff_table_data(row)%obs_inc_info%filter_kind = GAMMA_FILTER + case ('BOUNDED_NORMAL_RHF') + qceff_table_data(row)%obs_inc_info%filter_kind = BOUNDED_NORMAL_RHF + case default + write(errstring, *) 'Invalid filter kind: ', trim(filter_kind_string(row)) + call error_handler(E_ERR, 'read_qceff_table:', errstring, source) + end select + +end do + +call close_file(fileid) + +end subroutine read_qceff_table + +!------------------------------------------------------------------------ + + +subroutine obs_error_info(obs_def, error_variance, & + bounded_below, bounded_above, lower_bound, upper_bound) + +! Computes information needed to compute error sample for this observation +! This is called by perfect_model_obs when generating noisy obs +type(obs_def_type), intent(in) :: obs_def +real(r8), intent(out) :: error_variance +logical, intent(out) :: bounded_below, bounded_above +real(r8), intent(out) :: lower_bound, upper_bound + +integer :: obs_type, obs_qty +integer(i8) :: state_var_index +type(location_type) :: temp_loc + +integer :: QTY_loc(1) + +if (.not. module_initialized) call init_algorithm_info_mod() + +! Get the type of the observation +obs_type = get_obs_def_type_of_obs(obs_def) +! If it is negative, it is an identity obs +if(obs_type < 0) then + state_var_index = -1 * obs_type + call get_state_meta_data(state_var_index, temp_loc, obs_qty) +else + obs_qty = get_quantity_for_type_of_obs(obs_type) +endif + +! Get the default error variance +error_variance = get_obs_def_error_variance(obs_def) + +!use default values if qceff_table_filename is not in namelist +if (use_qty_defaults) then + bounded_below = .false.; bounded_above = .false. + lower_bound = MISSING_R8; upper_bound = MISSING_R8 + return +endif + +!find location of QTY in qceff_table_data structure +QTY_loc = findloc(specified_qtys, obs_qty) + +if (QTY_loc(1) == 0) then + !use default values if QTY is not in table + bounded_below = .false.; bounded_above = .false. + lower_bound = MISSING_R8; upper_bound = MISSING_R8 + + else + bounded_below = qceff_table_data(QTY_loc(1))%obs_error_info%bounded_below + bounded_above = qceff_table_data(QTY_loc(1))%obs_error_info%bounded_above + lower_bound = qceff_table_data(QTY_loc(1))%obs_error_info%lower_bound + upper_bound = qceff_table_data(QTY_loc(1))%obs_error_info%upper_bound + +endif + +end subroutine obs_error_info + +!------------------------------------------------------------------------- + + +subroutine probit_dist_info(qty, is_state, is_inflation, dist_type, & + bounded_below, bounded_above, lower_bound, upper_bound) + +integer, intent(in) :: qty +logical, intent(in) :: is_state ! True for state variable, false for obs +logical, intent(in) :: is_inflation ! True for inflation transform +integer, intent(out) :: dist_type +logical, intent(out) :: bounded_below, bounded_above +real(r8), intent(out) :: lower_bound, upper_bound + +integer :: QTY_loc(1) + +! Have input information about the qty of the state or observation being transformed +! along with additional logical info that indicates whether this is an observation +! or state variable and about whether the transformation is being done for inflation +! or for regress. + +! Selects the appropriate transform, which is specified in the QCF input table per QTY. +! At present, the options are NORMAL_DISTRIBUTION, BOUNDED_NORMAL_RH_DISTRIBUTION, +! GAMMA_DISTRIBUTION, BETA_DISTRIBUTION, LOG_NORMAL_DISTRIBUTION, +! UNIFORM_DISTRIBUTION +! If the BNRH is selected then information about the bounds must also be set. +! For example, if qty corresponds to a sea ice fraction then an appropriate choice +! would be: +! bounded_below = .true.; bounded_above = .true. +! lower_bound = 0.0_r8; upper_bounds = 1.0_r8 + +! In the long run, may not have to have separate controls for each of the input possibilities +! However, for now these are things that need to be explored for science understanding + +if (.not. module_initialized) call init_algorithm_info_mod() + +!use default values if qceff_table_filename is not in namelist +if (use_qty_defaults) then + dist_type = NORMAL_DISTRIBUTION + bounded_below = .false.; bounded_above = .false. + lower_bound = MISSING_R8; upper_bound = MISSING_R8 + return +endif + +QTY_loc = findloc(specified_qtys, qty) + +if (QTY_loc(1) == 0) then + !use default values if QTY is not in table + dist_type = NORMAL_DISTRIBUTION + bounded_below = .false.; bounded_above = .false. + lower_bound = MISSING_R8; upper_bound = MISSING_R8 + + elseif(is_inflation) then + ! Case for inflation transformation + + dist_type = qceff_table_data(QTY_loc(1))%probit_inflation%dist_type + bounded_below = qceff_table_data(QTY_loc(1))%probit_inflation%bounded_below + bounded_above = qceff_table_data(QTY_loc(1))%probit_inflation%bounded_above + lower_bound = qceff_table_data(QTY_loc(1))%probit_inflation%lower_bound + upper_bound = qceff_table_data(QTY_loc(1))%probit_inflation%upper_bound + + elseif(is_state) then + ! Case for state variable priors + + dist_type = qceff_table_data(QTY_loc(1))%probit_state%dist_type + bounded_below = qceff_table_data(QTY_loc(1))%probit_state%bounded_below + bounded_above = qceff_table_data(QTY_loc(1))%probit_state%bounded_above + lower_bound = qceff_table_data(QTY_loc(1))%probit_state%lower_bound + upper_bound = qceff_table_data(QTY_loc(1))%probit_state%upper_bound + + else + ! This case is for observation (extended state) priors + + dist_type = qceff_table_data(QTY_loc(1))%probit_extended_state%dist_type + bounded_below = qceff_table_data(QTY_loc(1))%probit_extended_state%bounded_below + bounded_above = qceff_table_data(QTY_loc(1))%probit_extended_state%bounded_above + lower_bound = qceff_table_data(QTY_loc(1))%probit_extended_state%lower_bound + upper_bound = qceff_table_data(QTY_loc(1))%probit_extended_state%upper_bound + +endif + +end subroutine probit_dist_info + +!------------------------------------------------------------------------ + + +subroutine obs_inc_info(obs_qty, filter_kind, bounded_below, bounded_above, & + lower_bound, upper_bound) + +integer, intent(in) :: obs_qty +integer, intent(out) :: filter_kind +logical, intent(out) :: bounded_below, bounded_above +real(r8), intent(out) :: lower_bound, upper_bound + +integer :: QTY_loc(1) + +if (.not. module_initialized) call init_algorithm_info_mod() + +!use default values if qceff_table_filename is not in namelist +if (use_qty_defaults) then + filter_kind = EAKF + bounded_below = .false.; bounded_above = .false. + lower_bound = MISSING_R8; upper_bound = MISSING_R8 + return +endif + +!find location of QTY in qceff_table_data structure +QTY_loc = findloc(specified_qtys, obs_qty) + +if (QTY_loc(1) == 0) then + !use default values if QTY is not in table + filter_kind = EAKF + bounded_below = .false.; bounded_above = .false. + lower_bound = MISSING_R8; upper_bound = MISSING_R8 + + else + + filter_kind = qceff_table_data(QTY_loc(1))%obs_inc_info%filter_kind + bounded_below = qceff_table_data(QTY_loc(1))%obs_inc_info%bounded_below + bounded_above = qceff_table_data(QTY_loc(1))%obs_inc_info%bounded_above + lower_bound = qceff_table_data(QTY_loc(1))%obs_inc_info%lower_bound + upper_bound = qceff_table_data(QTY_loc(1))%obs_inc_info%upper_bound + +endif + +end subroutine obs_inc_info + +!------------------------------------------------------------------------ + + +subroutine assert_qceff_table_version() + +! Subroutine to ensure the correct version of the QCF table is being used + +character(1), parameter :: QCF_VERSION = '1' + +if (trim(header1(4)) /= QCF_VERSION) then + write(errstring,*) 'Using outdated/incorrect version of the QCF table' + call error_handler(E_ERR, 'assert_qceff_table_version:', errstring, source) +endif + +end subroutine assert_qceff_table_version + +!------------------------------------------------------------------------ + + +subroutine verify_qceff_table_data() + +! Subroutine to ensure that the data in the QCF table is valid + +integer :: row + +if (use_qty_defaults) return + +!Checks that all bounds are valid; currently checks that the lower bound in less than the upper +!Here we could add more specific checks if we have known limits on the bounds +do row = 1, size(qceff_table_data) + + if (qceff_table_data(row)%obs_error_info%bounded_below .and. qceff_table_data(row)%obs_error_info%bounded_above) then + if(qceff_table_data(row)%obs_error_info%lower_bound > qceff_table_data(row)%obs_error_info%upper_bound) then + write(errstring,*) 'Invalid bounds in obs_error_info' + call error_handler(E_ERR, 'verify_qceff_table_data:', errstring, source) + endif + endif + if (qceff_table_data(row)%probit_inflation%bounded_below .and. qceff_table_data(row)%probit_inflation%bounded_above) then + if(qceff_table_data(row)%probit_inflation%lower_bound > qceff_table_data(row)%probit_inflation%upper_bound) then + write(errstring,*) 'Invalid bounds in probit_inflation' + call error_handler(E_ERR, 'verify_qceff_table_data:', errstring, source) + endif + endif + if(qceff_table_data(row)%probit_state%bounded_below .and. qceff_table_data(row)%probit_state%bounded_above) then + if(qceff_table_data(row)%probit_state%lower_bound > qceff_table_data(row)%probit_state%upper_bound) then + write(errstring,*) 'Invalid bounds in probit_state' + call error_handler(E_ERR, 'verify_qceff_table_data:', errstring, source) + endif + endif + if(qceff_table_data(row)%probit_extended_state%bounded_below .and. qceff_table_data(row)%probit_extended_state%bounded_above) then + if(qceff_table_data(row)%probit_extended_state%lower_bound > qceff_table_data(row)%probit_extended_state%upper_bound) then + write(errstring,*) 'Invalid bounds in probit_extended_state' + call error_handler(E_ERR, 'verify_qceff_table_data:', errstring, source) + endif + endif + if(qceff_table_data(row)%obs_inc_info%bounded_below .and. qceff_table_data(row)%obs_inc_info%bounded_above) then + if(qceff_table_data(row)%obs_inc_info%lower_bound > qceff_table_data(row)%obs_inc_info%upper_bound) then + write(errstring,*) 'Invalid bounds in obs_inc_info' + call error_handler(E_ERR, 'verify_qceff_table_data:', errstring, source) + endif + endif +end do + + +!Ensures that there are no duplicate QTYs in the table +do row = 1, size(qceff_table_data) + if(count(specified_qtys==specified_qtys(row)) > 1) then + write(errstring,*) trim(get_name_for_quantity(specified_qtys(row))), ' has multiple entries in the table' + call error_handler(E_ERR, 'verify_qceff_table_data:', errstring, source) + endif +end do + +end subroutine verify_qceff_table_data + +!------------------------------------------------------------------------ + + +subroutine log_qceff_table_data() + +! Subroutine to write the data in QCF table to dart_log +character(len=2000) :: log_msg +integer :: row + +if (use_qty_defaults) return + +call error_handler(E_MSG, '', '', source) !Writing blank line to log +call error_handler(E_MSG, 'log_qceff_table_data:', 'Logging the data in the QCEFF Table', source) + +! Write the table headers to the dart_log and terminal +write(log_msg, '(2A6, A9, A)') header1(:) +call error_handler(E_MSG, 'log_qceff_table_data:', trim(log_msg), source) + +write(log_msg,'(A10, 2A14, 2A12, 3(A10, 2A14, 2A12), A12, 2A14, 2A12)') header2(:) +call error_handler(E_MSG, 'log_qceff_table_data:', trim(log_msg), source) + +! Write the table data to the dart_log and terminal +do row = 1, size(qceff_table_data) + write(log_msg, *) trim(get_name_for_quantity(specified_qtys(row))), qceff_table_data(row)%obs_error_info%bounded_below, qceff_table_data(row)%obs_error_info%bounded_above, & + qceff_table_data(row)%obs_error_info%lower_bound, qceff_table_data(row)%obs_error_info%upper_bound, trim(dist_type_string_probit_inflation(row)), & + qceff_table_data(row)%probit_inflation%bounded_below, qceff_table_data(row)%probit_inflation%bounded_above, & + qceff_table_data(row)%probit_inflation%lower_bound, qceff_table_data(row)%probit_inflation%upper_bound, trim(dist_type_string_probit_state(row)), & + qceff_table_data(row)%probit_state%bounded_below, qceff_table_data(row)%probit_state%bounded_above, & + qceff_table_data(row)%probit_state%lower_bound, qceff_table_data(row)%probit_state%upper_bound, trim(dist_type_string_probit_extended_state(row)), & + qceff_table_data(row)%probit_extended_state%bounded_below, qceff_table_data(row)%probit_extended_state%bounded_above, & + qceff_table_data(row)%probit_extended_state%lower_bound, qceff_table_data(row)%probit_extended_state%upper_bound, & + trim(filter_kind_string(row)), qceff_table_data(row)%obs_inc_info%bounded_below, qceff_table_data(row)%obs_inc_info%bounded_above, & + qceff_table_data(row)%obs_inc_info%lower_bound, qceff_table_data(row)%obs_inc_info%upper_bound +call error_handler(E_MSG, 'log_qceff_table_data:', trim(log_msg), source) +end do + +call error_handler(E_MSG, '', '', source) !Writing blank line to log + +end subroutine log_qceff_table_data + +!------------------------------------------------------------------------ + + +subroutine end_algorithm_info_mod() + +if (.not. module_initialized) return +module_initialized = .false. + +if (use_qty_defaults) return + +deallocate(specified_qtys) +deallocate(qceff_table_data) + +end subroutine end_algorithm_info_mod + +!---------------------------------------------------------------------- + +end module algorithm_info_mod diff --git a/assimilation_code/modules/assimilation/algorithm_info_mod.nml b/assimilation_code/modules/assimilation/algorithm_info_mod.nml new file mode 100644 index 0000000000..730cc67423 --- /dev/null +++ b/assimilation_code/modules/assimilation/algorithm_info_mod.nml @@ -0,0 +1,3 @@ +&algorithm_info_mod + qceff_table_filename = '' +/ diff --git a/assimilation_code/modules/assimilation/assim_tools_mod.f90 b/assimilation_code/modules/assimilation/assim_tools_mod.f90 index 99b636c753..d1b6e2bbfc 100644 --- a/assimilation_code/modules/assimilation/assim_tools_mod.f90 +++ b/assimilation_code/modules/assimilation/assim_tools_mod.f90 @@ -8,7 +8,7 @@ module assim_tools_mod !> \defgroup assim_tools assim_tools_mod !> !> @{ -use types_mod, only : r8, i8, digits12, PI, missing_r8 +use types_mod, only : r8, i8, PI, missing_r8 use options_mod, only : get_missing_ok_status @@ -16,7 +16,7 @@ module assim_tools_mod find_namelist_in_file, error_handler, & E_ERR, E_MSG, nmlfileunit, do_nml_file, do_nml_term, & open_file, close_file, timestamp -use sort_mod, only : index_sort +use sort_mod, only : index_sort use random_seq_mod, only : random_seq_type, random_gaussian, init_random_seq, & random_uniform @@ -71,6 +71,23 @@ module assim_tools_mod use quality_control_mod, only : good_dart_qc, DARTQC_FAILED_VERT_CONVERT +use probit_transform_mod, only : transform_to_probit, transform_from_probit, & + transform_all_from_probit + +use normal_distribution_mod, only : normal_cdf, inv_weighted_normal_cdf + +use algorithm_info_mod, only : probit_dist_info, obs_inc_info, EAKF, ENKF, & + BOUNDED_NORMAL_RHF, UNBOUNDED_RHF, GAMMA_FILTER, & + KERNEL, OBS_PARTICLE + +use gamma_distribution_mod, only : gamma_cdf, inv_gamma_cdf, gamma_mn_var_to_shape_scale, & + gamma_gamma_prod + +use bnrh_distribution_mod, only : inv_bnrh_cdf, bnrh_cdf, inv_bnrh_cdf_like + +use distribution_params_mod, only : distribution_params_type, deallocate_distribution_params + + implicit none private @@ -114,21 +131,8 @@ module assim_tools_mod !---- namelist with default values -! Filter kind selects type of observation space filter -! 1 = EAKF filter -! 2 = ENKF -! 3 = Kernel filter -! 4 = particle filter -! 5 = random draw from posterior -! 6 = deterministic draw from posterior with fixed kurtosis -! 8 = Rank Histogram Filter (see Anderson 2011) -! -! special_localization_obs_types -> Special treatment for the specified observation types -! special_localization_cutoffs -> Different cutoff value for each specified obs type -! -integer :: filter_kind = 1 real(r8) :: cutoff = 0.2_r8 -logical :: sort_obs_inc = .false. +logical :: sort_obs_inc = .true. logical :: spread_restoration = .false. logical :: sampling_error_correction = .false. integer :: adaptive_localization_threshold = -1 @@ -143,7 +147,7 @@ module assim_tools_mod logical :: output_localization_diagnostics = .false. character(len = 129) :: localization_diagnostics_file = "localization_diagnostics" -! Following only relevant for filter_kind = 8 +! Following only relevant for filter_kind = UNBOUNDED_RHF logical :: rectangular_quadrature = .true. logical :: gaussian_likelihood_tails = .false. @@ -185,7 +189,7 @@ module assim_tools_mod ! compared to previous versions of this namelist item. logical :: distribute_mean = .false. -namelist / assim_tools_nml / filter_kind, cutoff, sort_obs_inc, & +namelist / assim_tools_nml / cutoff, sort_obs_inc, & spread_restoration, sampling_error_correction, & adaptive_localization_threshold, adaptive_cutoff_floor, & print_every_nth_obs, rectangular_quadrature, gaussian_likelihood_tails, & @@ -230,9 +234,9 @@ subroutine assim_tools_init() ! Note null_win_mod.f90 ignores distibute_mean. if (task_count() == 1) distribute_mean = .true. -! FOR NOW, can only do spread restoration with filter option 1 (need to extend this) -if(spread_restoration .and. .not. filter_kind == 1) then - write(msgstring, *) 'cannot combine spread_restoration and filter_kind ', filter_kind +if(spread_restoration) then + write(msgstring, *) 'The spread_restoration option is not supported in this version of ', & + 'DART. Contact the DAReS team if this option is needed ' call error_handler(E_ERR,'assim_tools_init:', msgstring, source) endif @@ -322,7 +326,8 @@ subroutine filter_assim(ens_handle, obs_ens_handle, obs_seq, keys, & ! changed the ensemble sized things here to allocatable -real(r8) :: obs_prior(ens_size), obs_inc(ens_size), updated_ens(ens_size) +real(r8) :: obs_prior(ens_size), obs_inc(ens_size) +real(r8) :: obs_post(ens_size), probit_obs_prior(ens_size), probit_obs_post(ens_size) real(r8) :: final_factor real(r8) :: net_a(num_groups), correl(num_groups) real(r8) :: obs(1), obs_err_var, my_inflate, my_inflate_sd @@ -370,6 +375,15 @@ subroutine filter_assim(ens_handle, obs_ens_handle, obs_seq, keys, & logical :: local_ss_inflate logical :: local_obs_inflate +! Storage for normal probit conversion, keeps prior mean and sd for all state ensemble members +type(distribution_params_type) :: state_dist_params(ens_handle%my_num_vars) +type(distribution_params_type) :: obs_dist_params(obs_ens_handle%my_num_vars) +integer :: dist_for_state, dist_for_obs +type(distribution_params_type) :: temp_dist_params +logical :: bounded_below, bounded_above +real(r8) :: lower_bound, upper_bound +real(r8) :: probit_ens(ens_size) + ! allocate rather than dump all this on the stack allocate(close_obs_dist( obs_ens_handle%my_num_vars), & close_obs_ind( obs_ens_handle%my_num_vars), & @@ -394,22 +408,6 @@ subroutine filter_assim(ens_handle, obs_ens_handle, obs_seq, keys, & ! Need to give create_mean_window the mean copy call create_mean_window(ens_handle, ENS_MEAN_COPY, distribute_mean) -! filter kinds 1 and 8 return sorted increments, however non-deterministic -! inflation can scramble these. the sort is expensive, so help users get better -! performance by rejecting namelist combinations that do unneeded work. -if (sort_obs_inc) then - if(deterministic_inflate(inflate) .and. ((filter_kind == 1) .or. (filter_kind == 8))) then - write(msgstring, *) 'With a deterministic filter [assim_tools_nml:filter_kind = ',filter_kind,']' - write(msgstring2, *) 'and deterministic inflation [filter_nml:inf_deterministic = .TRUE.]' - write(msgstring3, *) 'assim_tools_nml:sort_obs_inc = .TRUE. is not needed and is expensive.' - call error_handler(E_MSG,'', '') ! whitespace - call error_handler(E_MSG,'WARNING filter_assim:', msgstring, source, & - text2=msgstring2,text3=msgstring3) - call error_handler(E_MSG,'', '') ! whitespace - sort_obs_inc = .FALSE. - endif -endif - ! Open the localization diagnostics file if(output_localization_diagnostics .and. my_task_id() == 0) & localization_unit = open_file(localization_diagnostics_file, action = 'append') @@ -493,6 +491,16 @@ subroutine filter_assim(ens_handle, obs_ens_handle, obs_seq, keys, & ! Get the location and kind of all my state variables do i = 1, ens_handle%my_num_vars call get_state_meta_data(my_state_indx(i), my_state_loc(i), my_state_kind(i)) + + ! Need to specify what kind of prior to use for each + call probit_dist_info(my_state_kind(i), .true., .false., dist_for_state, & + bounded_below, bounded_above, lower_bound, upper_bound) + + ! Convert all my state variables to appropriate probit space + call transform_to_probit(ens_size, ens_handle%copies(1:ens_size, i), dist_for_state, & + state_dist_params(i), probit_ens, .false., & + bounded_below, bounded_above, lower_bound, upper_bound) + ens_handle%copies(1:ens_size, i) = probit_ens end do !> optionally convert all state location verticals @@ -514,6 +522,26 @@ subroutine filter_assim(ens_handle, obs_ens_handle, obs_seq, keys, & end do endif +! Have gotten the mean and variance from original ensembles, can convert all my obs to probit +! CAN WE DO THE ADAPTIVE INFLATION ENTIRELY IN PROBIT SPACE TO MAKE IT DISTRIBUTION INDEPENDENT???? +! WOULD NEED AN OBSERVATION ERROR VARIANCE IN PROBIT SPACE SOMEHOW. IS THAT POSSIBLE??? + +do i = 1, my_num_obs + obs_qc = obs_ens_handle%copies(OBS_GLOBAL_QC_COPY, i) + ! Only do conversion of qc if forward operator is good + if(nint(obs_qc) == 0) then + ! Need to specify what kind of prior to use for each + call probit_dist_info(my_obs_kind(i), .false., .false., dist_for_obs, & + bounded_below, bounded_above, lower_bound, upper_bound) + + ! Convert all my obs (extended state) variables to appropriate probit space + call transform_to_probit(ens_size, obs_ens_handle%copies(1:ens_size, i), dist_for_obs, & + obs_dist_params(i), probit_ens, .false., & + bounded_below, bounded_above, lower_bound, upper_bound) + obs_ens_handle%copies(1:ens_size, i) = probit_ens + endif +end do + ! Initialize the method for getting state variables close to a given ob on my process if (has_special_cutoffs) then call get_close_init(gc_state, my_num_state, 2.0_r8*cutoff, my_state_loc, 2.0_r8*cutoff_list) @@ -597,12 +625,17 @@ subroutine filter_assim(ens_handle, obs_ens_handle, obs_seq, keys, & ! Only value of 0 for DART QC field should be assimilated IF_QC_IS_OKAY: if(nint(obs_qc) ==0) then - obs_prior = obs_ens_handle%copies(1:ens_size, owners_index) ! Note that these are before DA starts, so can be different from current obs_prior orig_obs_prior_mean = obs_ens_handle%copies(OBS_PRIOR_MEAN_START: & OBS_PRIOR_MEAN_END, owners_index) orig_obs_prior_var = obs_ens_handle%copies(OBS_PRIOR_VAR_START: & OBS_PRIOR_VAR_END, owners_index) + + ! If QC is okay, convert this observation ensemble from probit to regular space + call transform_from_probit(ens_size, obs_ens_handle%copies(1:ens_size, owners_index) , & + obs_dist_params(owners_index), obs_ens_handle%copies(1:ens_size, owners_index)) + + obs_prior = obs_ens_handle%copies(1:ens_size, owners_index) endif IF_QC_IS_OKAY !Broadcast the info from this obs to all other processes @@ -640,8 +673,39 @@ subroutine filter_assim(ens_handle, obs_ens_handle, obs_seq, keys, & do group = 1, num_groups grp_bot = grp_beg(group); grp_top = grp_end(group) call obs_increment(obs_prior(grp_bot:grp_top), grp_size, obs(1), & - obs_err_var, obs_inc(grp_bot:grp_top), inflate, my_inflate, & + obs_err_var, base_obs_kind, obs_inc(grp_bot:grp_top), inflate, my_inflate, & my_inflate_sd, net_a(group)) + obs_post(grp_bot:grp_top) = obs_prior(grp_bot:grp_top) + obs_inc(grp_bot:grp_top) + + ! Convert both the prior and posterior to probit space (efficiency for prior???) + ! Running probit space with groups needs to be studied more carefully + ! EFFICIENCY NOTE: FOR RHF, THE OBS_INCREMENT HAS TO DO A SORT + ! THE POSTERIOR WOULD HAVE THE SAME RANK STATISTICS, SO THIS SORT WOULD BE THE SAME + ! THE SECOND CONVERT_TO_PROBIT CAN BE MUCH MORE EFFICIENT USING A SORT + ! SHOULD FIGURE OUT A WAY TO PASS THE SORT ORDER + ! NOTE 2: THIS CONVERSION IS USING THE INFO FROM THE CURRENT (UPDATED) PRIOR ENSEMBLE. THIS + ! IS GENERALLY GOING TO BE A DIFFERENT PROBIT TRANSFORMED ENSEMBLE THAN THE ONE THAT WAS JUST + ! CONVERTED FROM PROBIT SPACE BY THE PROCESS THAT OWNS THIS OBSERVATION. + + ! Need to specify what kind of prior to use for obs being assimilated + call probit_dist_info(base_obs_kind, .false., .false., dist_for_obs, & + bounded_below, bounded_above, lower_bound, upper_bound) + + ! Convert the prior and posterior for this observation to probit space + call transform_to_probit(grp_size, obs_prior(grp_bot:grp_top), dist_for_obs, & + temp_dist_params, probit_obs_prior(grp_bot:grp_top), .false., & + bounded_below, bounded_above, lower_bound, upper_bound) + call transform_to_probit(grp_size, obs_post(grp_bot:grp_top), dist_for_obs, & + temp_dist_params, probit_obs_post(grp_bot:grp_top), .true., & + bounded_below, bounded_above, lower_bound, upper_bound) + ! Free up the storage used for this transform + call deallocate_distribution_params(temp_dist_params) + + ! Copy back into original storage + obs_prior(grp_bot:grp_top) = probit_obs_prior(grp_bot:grp_top) + obs_post(grp_bot:grp_top) = probit_obs_post(grp_bot:grp_top) + ! Recompute obs_inc in probit space + obs_inc(grp_bot:grp_top) = obs_post(grp_bot:grp_top) - obs_prior(grp_bot:grp_top) ! Also compute prior mean and variance of obs for efficiency here obs_prior_mean(group) = sum(obs_prior(grp_bot:grp_top)) / grp_size @@ -750,6 +814,10 @@ subroutine filter_assim(ens_handle, obs_ens_handle, obs_seq, keys, & endif end do SEQUENTIAL_OBS +! Do the inverse probit transform for state variables +call transform_all_from_probit(ens_size, ens_handle%my_num_vars, ens_handle%copies, & + state_dist_params, ens_handle%copies) + ! Every pe needs to get the current my_inflate and my_inflate_sd back if(local_single_ss_inflate) then ens_handle%copies(ENS_INF_COPY, :) = my_inflate @@ -818,7 +886,7 @@ end subroutine filter_assim !------------------------------------------------------------- -subroutine obs_increment(ens_in, ens_size, obs, obs_var, obs_inc, & +subroutine obs_increment(ens_in, ens_size, obs, obs_var, obs_kind, obs_inc, & inflate, my_cov_inflate, my_cov_inflate_sd, net_a) ! Given the ensemble prior for an observation, the observation, and @@ -827,6 +895,7 @@ subroutine obs_increment(ens_in, ens_size, obs, obs_var, obs_inc, & integer, intent(in) :: ens_size real(r8), intent(in) :: ens_in(ens_size), obs, obs_var +integer, intent(in) :: obs_kind real(r8), intent(out) :: obs_inc(ens_size) type(adaptive_inflate_type), intent(inout) :: inflate real(r8), intent(inout) :: my_cov_inflate, my_cov_inflate_sd @@ -838,6 +907,13 @@ subroutine obs_increment(ens_in, ens_size, obs, obs_var, obs_inc, & real(r8) :: rel_weights(ens_size) +integer :: filter_kind +logical :: bounded_below, bounded_above +real(r8) :: lower_bound, upper_bound + +! Declarations for bounded rank histogram filter +real(r8) :: likelihood(ens_size), like_sum + ! Copy the input ensemble to something that can be modified ens = ens_in @@ -868,6 +944,12 @@ subroutine obs_increment(ens_in, ens_size, obs, obs_var, obs_inc, & prior_var = sum((ens - prior_mean)**2) / (ens_size - 1) endif +! Gets information about the increment method (filter_kind, bounds) for the current observation. +call obs_inc_info(obs_kind, filter_kind, bounded_below, bounded_above, & + lower_bound, upper_bound) + +! The first three options in the next if block of code may be inappropriate for +! some more general filters; need to revisit ! If obs_var == 0, delta function. The mean becomes obs value with no spread. ! If prior_var == 0, obs has no effect. The increments are 0. ! If both obs_var and prior_var == 0 there is no right thing to do, so Stop. @@ -897,47 +979,61 @@ subroutine obs_increment(ens_in, ens_size, obs, obs_var, obs_inc, & ! note that at this point we've taken care of the cases where either the ! obs_var or the prior_var is 0, so the individual routines no longer need ! to have code to test for those cases. - if(filter_kind == 1) then + if(filter_kind == EAKF) then call obs_increment_eakf(ens, ens_size, prior_mean, prior_var, & obs, obs_var, obs_inc, net_a) - else if(filter_kind == 2) then + else if(filter_kind == ENKF) then call obs_increment_enkf(ens, ens_size, prior_var, obs, obs_var, obs_inc) - else if(filter_kind == 3) then + else if(filter_kind == KERNEL) then call obs_increment_kernel(ens, ens_size, obs, obs_var, obs_inc) - else if(filter_kind == 4) then + else if(filter_kind == OBS_PARTICLE) then call obs_increment_particle(ens, ens_size, obs, obs_var, obs_inc) - else if(filter_kind == 5) then - call obs_increment_ran_kf(ens, ens_size, prior_mean, prior_var, obs, obs_var, obs_inc) - else if(filter_kind == 6) then - call obs_increment_det_kf(ens, ens_size, prior_mean, prior_var, obs, obs_var, obs_inc) - else if(filter_kind == 7) then - call obs_increment_boxcar(ens, ens_size, obs, obs_var, obs_inc, rel_weights) - else if(filter_kind == 8) then + else if(filter_kind == UNBOUNDED_RHF) then call obs_increment_rank_histogram(ens, ens_size, prior_var, obs, obs_var, obs_inc) + else if(filter_kind == GAMMA_FILTER) then + call obs_increment_gamma(ens, ens_size, prior_mean, prior_var, obs, obs_var, obs_inc) + !-------------------------------------------------------------------------- + else if(filter_kind == BOUNDED_NORMAL_RHF) then + + ! Use bounded normal likelihood; Could use an arbitrary likelihood + do i = 1, ens_size + likelihood(i) = get_truncated_normal_like(ens(i), obs, obs_var, & + bounded_below, bounded_above, lower_bound, upper_bound) + end do + + ! Normalize the likelihood here + like_sum = sum(likelihood) + ! If likelihood underflow, assume flat likelihood, so no increments + if(like_sum <= 0.0_r8) then + obs_inc = 0.0_r8 + return + else + likelihood = likelihood / like_sum + endif + + call obs_increment_bounded_norm_rhf(ens, likelihood, ens_size, prior_var, & + obs_inc, bounded_below, bounded_above, lower_bound, upper_bound) + + ! Do test of inversion for an uninformative likelihood + !!!t_likelihood = 1.0 + !!!t_likelihood = t_likelihood / sum(t_likelihood) + !!!call obs_increment_bounded_norm_rhf(ens, t_likelihood, ens_size, prior_var, & + !!!obs_inc_temp, bounded_below, bounded_above, lower_bound, upper_bound) + !!!if(maxval(abs(obs_inc_temp)) > 1e-3_r8) then + !!!write(msgstring, *) 'Null increment tests exceed the threshold ', maxval(abs(obs_inc_temp)) + !!!call error_handler(E_ERR, 'obs_increment', msgstring, source) + !!!endif + + !-------------------------------------------------------------------------- else call error_handler(E_ERR,'obs_increment', & - 'Illegal value of filter_kind in assim_tools namelist [1-8 OK]', source) + 'Illegal value of filter_kind', source) endif endif ! Add in the extra increments if doing observation space covariance inflation if(do_obs_inflate(inflate)) obs_inc = obs_inc + inflate_inc -! To minimize regression errors, may want to sort to minimize increments -! This makes sense for any of the non-deterministic algorithms -! By doing it here, can take care of both standard non-deterministic updates -! plus non-deterministic obs space covariance inflation. This is expensive, so -! don't use it if it's not needed. -if (sort_obs_inc) then - new_val = ens_in + obs_inc - ! Sorting to make increments as small as possible - call index_sort(ens_in, ens_index, ens_size) - call index_sort(new_val, new_index, ens_size) - do i = 1, ens_size - obs_inc(ens_index(i)) = new_val(new_index(i)) - ens_in(ens_index(i)) - end do -endif - ! Get the net change in spread if obs space inflation was used if(do_obs_inflate(inflate)) net_a = net_a * sqrt(my_cov_inflate) @@ -945,6 +1041,48 @@ end subroutine obs_increment +subroutine obs_increment_gamma(ens, ens_size, prior_mean, prior_var, obs, obs_var, obs_inc) +!======================================================================== +! +! Gamma version of obs increment. This demonstrates the updat + +integer, intent(in) :: ens_size +real(r8), intent(in) :: ens(ens_size), prior_mean, prior_var, obs, obs_var +real(r8), intent(out) :: obs_inc(ens_size) + +real(r8) :: prior_shape, prior_scale, like_shape, like_scale, post_shape, post_scale +real(r8) :: q(ens_size), post(ens_size) +integer :: i + +! Compute the prior quantiles of each ensemble member in the prior gamma distribution +call gamma_mn_var_to_shape_scale(prior_mean, prior_var, prior_shape, prior_scale) +do i = 1, ens_size + q(i) = gamma_cdf(ens(i), prior_shape, prior_scale, .true., .false., 0.0_r8, missing_r8) +end do + +! Compute the statistics of the continous posterior distribution +call gamma_mn_var_to_shape_scale(obs, obs_var, like_shape, like_scale) +call gamma_gamma_prod(prior_shape, prior_scale, like_shape, like_scale, & + post_shape, post_scale) + +! Check for illegal values. This can occur if the distributions are getting too +! concentrated towards the bound +if(post_shape <= 0.0_r8) then + write(msgstring, *) 'Posterior gamma shape is negative ', post_shape + call error_handler(E_ERR, 'obs_increment_gamma', msgstring, source) +endif + +! Now invert the quantiles with the posterior distribution +do i = 1, ens_size + post(i) = inv_gamma_cdf(q(i), post_shape, post_scale, .true., .false., 0.0_r8, missing_r8) +end do + +obs_inc = post - ens + +end subroutine obs_increment_gamma + + + subroutine obs_increment_eakf(ens, ens_size, prior_mean, prior_var, obs, obs_var, obs_inc, a) !======================================================================== ! @@ -968,148 +1106,108 @@ subroutine obs_increment_eakf(ens, ens_size, prior_mean, prior_var, obs, obs_var end subroutine obs_increment_eakf -subroutine obs_increment_ran_kf(ens, ens_size, prior_mean, prior_var, obs, obs_var, obs_inc) -!======================================================================== -! -! Forms a random sample of the Gaussian from the update equations. -! This is very close to what a true 'ENSEMBLE' Kalman Filter would -! look like. Note that outliers, multimodality, etc., get tossed. +subroutine obs_increment_bounded_norm_rhf(ens, ens_like, ens_size, prior_var, & + obs_inc, bounded_below, bounded_above, lower_bound, upper_bound) +!------------------------------------------------------------------------ +integer, intent(in) :: ens_size +real(r8), intent(in) :: ens(ens_size) +real(r8), intent(in) :: ens_like(ens_size) +real(r8), intent(in) :: prior_var +real(r8), intent(out) :: obs_inc(ens_size) +logical, intent(in) :: bounded_below, bounded_above +real(r8), intent(in) :: lower_bound, upper_bound + +! Does bounded RHF assuming that the prior in outer regions is part of a normal. + +real(r8) :: sort_ens(ens_size), sort_ens_like(ens_size) +real(r8) :: post(ens_size), sort_post(ens_size), q(ens_size) +real(r8) :: tail_amp_left, tail_mean_left, tail_sd_left +real(r8) :: tail_amp_right, tail_mean_right, tail_sd_right +logical :: do_uniform_tail_left, do_uniform_tail_right +integer :: i, sort_ind(ens_size) + +! If all ensemble members are identical, this algorithm becomes undefined, so fail +if(prior_var <= 0.0_r8) then + msgstring = 'Ensemble variance <= 0 ' + call error_handler(E_ERR, 'obs_increment_bounded_norm_rhf', msgstring, source) +endif -integer, intent(in) :: ens_size -real(r8), intent(in) :: prior_mean, prior_var -real(r8), intent(in) :: ens(ens_size), obs, obs_var -real(r8), intent(out) :: obs_inc(ens_size) +! Do an index sort of the ensemble members; Use prior info for efficiency in the future +call index_sort(ens, sort_ind, ens_size) -real(r8) :: new_mean, var_ratio -real(r8) :: temp_mean, temp_var, new_ens(ens_size), new_var -integer :: i +! Get the sorted ensemble +sort_ens = ens(sort_ind) -var_ratio = obs_var / (prior_var + obs_var) -new_var = var_ratio * prior_var -new_mean = var_ratio * (prior_mean + prior_var*obs / obs_var) +! Get the sorted likelihood +sort_ens_like = ens_like(sort_ind) -! This will reproduce exactly for multiple runs with the same task count, -! but WILL NOT reproduce for a different number of MPI tasks. -! To make it independent of the number of MPI tasks, it would need to -! use the global ensemble number or something else that remains constant -! as the processor count changes. this is not currently an argument to -! this function and so we are not trying to make it task-count invariant. +! Generate the prior information for a BNRH for this ensemble +call bnrh_cdf(ens, ens_size, bounded_below, bounded_above, lower_bound, upper_bound, & + sort_ens, q, tail_amp_left, tail_mean_left, tail_sd_left, do_uniform_tail_left, & + tail_amp_right, tail_mean_right, tail_sd_right, do_uniform_tail_right) -! Form a random sample from the updated distribution -! Then adjust the mean (what about adjusting the variance?)! -! Definitely need to sort with this; sort is done in main obs_increment -if(first_inc_ran_call) then - call init_random_seq(inc_ran_seq, my_task_id() + 1) - first_inc_ran_call = .false. -endif +! Invert the bnrh cdf after it is multiplied by the likelihood +call inv_bnrh_cdf_like(q, ens_size, sort_ens, & + bounded_below, bounded_above, lower_bound, upper_bound, & + tail_amp_left, tail_mean_left, tail_sd_left, do_uniform_tail_left, & + tail_amp_right, tail_mean_right, tail_sd_right, do_uniform_tail_right, post, & + sort_ens_like) +! The posterior needs to be sorted to get increments; this can be done more efficiently +sort_post = post(sort_ind) +! These are increments for sorted ensemble; convert to increments for unsorted do i = 1, ens_size - new_ens(i) = random_gaussian(inc_ran_seq, new_mean, sqrt(prior_var*var_ratio)) + obs_inc(sort_ind(i)) = sort_post(i) - ens(sort_ind(i)) + ! It may be possible, although apparently exceedingly unusual, to generate an increment + ! here that when added back onto the prior leads to a posterior that is greater than + ! the bounds. Unclear if there is any direct way to fix this given that increments are + ! being passed out. end do -! Adjust the mean of the new ensemble -temp_mean = sum(new_ens) / ens_size -new_ens(:) = new_ens(:) - temp_mean + new_mean +end subroutine obs_increment_bounded_norm_rhf -! Compute prior variance and mean from sample -temp_var = sum((new_ens - new_mean)**2) / (ens_size - 1) -! Adjust the variance, also -new_ens = (new_ens - new_mean) * sqrt(new_var / temp_var) + new_mean - -! Get the increments -obs_inc = new_ens - ens -end subroutine obs_increment_ran_kf - -subroutine obs_increment_det_kf(ens, ens_size, prior_mean, prior_var, obs, obs_var, obs_inc) -!======================================================================== -! -! Does a deterministic ensemble layout for the updated Gaussian. -! Note that all outliers, multimodal behavior, etc. get tossed. - -integer, intent(in) :: ens_size -real(r8), intent(in) :: prior_mean, prior_var -real(r8), intent(in) :: ens(ens_size), obs, obs_var -real(r8), intent(out) :: obs_inc(ens_size) - -real(r8) :: new_mean, var_ratio, temp_var, new_ens(ens_size), new_var -integer :: i - -var_ratio = obs_var / (prior_var + obs_var) -new_var = var_ratio * prior_var -new_mean = var_ratio * (prior_mean + prior_var*obs / obs_var) - -! Want a symmetric distribution with kurtosis 3 and variance new_var and mean new_mean -if(ens_size /= 20) then - write(*, *) 'EXPERIMENTAL version obs_increment_det_kf only works for ens_size 20 now' - stop +! Computes a normal or truncated normal (above and/or below) likelihood. +function get_truncated_normal_like(x, obs, obs_var, & + bounded_below, bounded_above, lower_bound, upper_bound) +!------------------------------------------------------------------------ +real(r8) :: get_truncated_normal_like +real(r8), intent(in) :: x +real(r8), intent(in) :: obs, obs_var +logical, intent(in) :: bounded_below, bounded_above +real(r8), intent(in) :: lower_bound, upper_bound + +real(r8) :: cdf(2), obs_sd, weight + +! A zero observation error variance is a degenerate case +if(obs_var <= 0.0_r8) then + if(x == obs) then + get_truncated_normal_like = 1.0_r8 + else + get_truncated_normal_like = 0.0_r8 + endif + return endif -! This has kurtosis of 3.0, verify again from initial uniform -!new_ens(1) = -2.146750_r8 -!new_ens(2) = -1.601447_r8 -!new_ens(3) = -1.151582_r8 -!new_ens(4) = -0.7898650_r8 -!new_ens(5) = -0.5086292_r8 -!new_ens(6) = -0.2997678_r8 -!new_ens(7) = -0.1546035_r8 -!new_ens(8) = -6.371084E-02_r8 -!new_ens(9) = -1.658448E-02_r8 -!new_ens(10) = -9.175255E-04_r8 - -! This has kurtosis of 3.0, verify again from initial inverse gaussian -!new_ens(1) = -2.188401_r8 -!new_ens(2) = -1.502174_r8 -!new_ens(3) = -1.094422_r8 -!new_ens(4) = -0.8052422_r8 -!new_ens(5) = -0.5840152_r8 -!new_ens(6) = -0.4084518_r8 -!new_ens(7) = -0.2672727_r8 -!new_ens(8) = -0.1547534_r8 -!new_ens(9) = -6.894587E-02_r8 -!new_ens(10) = -1.243549E-02_r8 - -! This has kurtosis of 2.0, verify again -new_ens(1) = -1.789296_r8 -new_ens(2) = -1.523611_r8 -new_ens(3) = -1.271505_r8 -new_ens(4) = -1.033960_r8 -new_ens(5) = -0.8121864_r8 -new_ens(6) = -0.6077276_r8 -new_ens(7) = -0.4226459_r8 -new_ens(8) = -0.2598947_r8 -new_ens(9) = -0.1242189_r8 -new_ens(10) = -2.539018E-02_r8 - -! This has kurtosis of 1.7, verify again -!new_ens(1) = -1.648638_r8 -!new_ens(2) = -1.459415_r8 -!new_ens(3) = -1.272322_r8 -!new_ens(4) = -1.087619_r8 -!new_ens(5) = -0.9056374_r8 -!new_ens(6) = -0.7268229_r8 -!new_ens(7) = -0.5518176_r8 -!new_ens(8) = -0.3816142_r8 -!new_ens(9) = -0.2179997_r8 -!new_ens(10) = -6.538583E-02_r8 -do i = 11, 20 - new_ens(i) = -1.0_r8 * new_ens(20 + 1 - i) -end do +obs_sd = sqrt(obs_var) -! Right now, this ensemble has mean 0 and some variance -! Compute prior variance and mean from sample -temp_var = sum((new_ens)**2) / (ens_size - 1) +! If the truth were at point x, what is the weight of the truncated normal obs error dist? +! If no bounds, the whole cdf is possible +cdf(1) = 0.0_r8 +cdf(2) = 1.0_r8 -! Adjust the variance of this ensemble to match requirements and add in the mean -new_ens = new_ens * sqrt(new_var / temp_var) + new_mean +! Compute the cdf's at the bounds if they exist +if(bounded_below) cdf(1) = normal_cdf(lower_bound, x, obs_sd) +if(bounded_above) cdf(2) = normal_cdf(upper_bound, x, obs_sd) -! Get the increments -obs_inc = new_ens - ens +! The weight is the reciprocal of the fraction of the cdf that is in legal range +weight = 1.0_r8 / (cdf(2) - cdf(1)) -end subroutine obs_increment_det_kf +get_truncated_normal_like = weight * exp(-1.0_r8 * (x - obs)**2 / (2.0_r8 * obs_var)) +end function get_truncated_normal_like @@ -1196,7 +1294,8 @@ subroutine obs_increment_enkf(ens, ens_size, prior_var, obs, obs_var, obs_inc) real(r8) :: obs_var_inv, prior_var_inv, new_var, new_mean(ens_size) ! real(r8) :: sx, s_x2 real(r8) :: temp_mean, temp_obs(ens_size) -integer :: i +real(r8) :: new_val(ens_size) +integer :: i, ens_index(ens_size), new_index(ens_size) ! Compute mt_rinv_y (obs error normalized by variance) obs_var_inv = 1.0_r8 / obs_var @@ -1231,6 +1330,21 @@ subroutine obs_increment_enkf(ens, ens_size, prior_var, obs, obs_var, obs_inc) obs_inc(i) = new_mean(i) - ens(i) end do +! To minimize regression errors, may want to sort to minimize increments +! This makes sense for any of the non-deterministic algorithms +! By doing it here, can take care of both standard non-deterministic updates +! plus non-deterministic obs space covariance inflation. This is expensive, so +! don't use it if it's not needed. +if (sort_obs_inc) then + new_val = ens + obs_inc + ! Sorting to make increments as small as possible + call index_sort(ens, ens_index, ens_size) + call index_sort(new_val, new_index, ens_size) + do i = 1, ens_size + obs_inc(ens_index(i)) = new_val(new_index(i)) - ens(ens_index(i)) + end do +endif + ! Can also adjust mean (and) variance of final sample; works fine !sx = sum(new_mean) !s_x2 = sum(new_mean * new_mean) @@ -1407,41 +1521,6 @@ subroutine update_from_obs_inc(obs, obs_prior_mean, obs_prior_var, obs_inc, & ! Then compute the increment as product of reg_coef and observation space increment state_inc = reg_coef * obs_inc -! -! FIXME: craig schwartz has a degenerate case involving externally computed -! forward operators in which the obs prior variance is in fact exactly 0. -! adding this test allowed him to continue to use spread restoration -! without numerical problems. we don't know if this is sufficient; -! for now we'll leave the original code but it needs to be revisited. -! -! Spread restoration algorithm option. -!if(spread_restoration .and. obs_prior_var > 0.0_r8) then -! - -! Spread restoration algorithm option. -if(spread_restoration) then - ! Don't use this to reduce spread at present (should revisit this line) - net_a = min(net_a_in, 1.0_r8) - - ! Default restoration increment is 0.0 - restoration_inc = 0.0_r8 - - ! Compute the factor by which to inflate - ! These come from correl_error.f90 in system_simulation and the files ens??_pairs and - ! ens_pairs_0.5 in work under system_simulation. Assume a linear reduction from 1 - ! as a function of the net_a. Assume that the slope of this reduction is a function of - ! the reciprocal of the ensemble_size (slope = 0.80 / ens_size). These are empirical - ! for now. See also README in spread_restoration_paper documentation. - !!!factor = 1.0_r8 / (1.0_r8 + (net_a - 1.0_r8) * (0.8_r8 / ens_size)) - 1.0_r8 - factor = 1.0_r8 / (1.0_r8 + (net_a - 1.0_r8) / (-2.4711_r8 + 1.6386_r8 * ens_size)) - 1.0_r8 - !!!factor = 1.0_r8 / (1.0_r8 + (net_a**2 - 1.0_r8) * (-0.0111_r8 + .8585_r8 / ens_size)) - 1.0_r8 - - ! Variance restoration - state_mean = sum(state) / ens_size - restoration_inc = factor * (state - state_mean) - state_inc = state_inc + restoration_inc -endif - !! NOTE: if requested to be returned, correl_out is set further up in the !! code, before the sampling error correction, if enabled, is applied. !! this means it's returning a different larger value than the correl @@ -1527,184 +1606,10 @@ subroutine get_correction_from_table(scorrel, mean_factor, expected_true_correl, end subroutine get_correction_from_table - -subroutine obs_increment_boxcar(ens, ens_size, obs, obs_var, obs_inc, rel_weight) -!------------------------------------------------------------------------ -! -! An observation space update that uses a set of boxcar kernels plus two -! half-gaussians on the wings to represent the prior distribution. If N is -! the ensemble size, 1/(N+1) of the mass is placed between each ensemble -! member. This is reminiscent of the ranked historgram approach for -! evaluating ensembles. The prior distribution on the wings is -! represented by a half gaussian with mean being the outermost ensemble -! member (left or right) and variance being somewhat arbitrarily chosen -! as half the total ensemble sample variance. A particle -! filter like algorithm is then used for the update. The weight associated -! with each prior ensemble member is computed by evaluating the likelihood. -! For the interior, the domain for each boxcar is divided in half and each -! half is associated with the nearest ensemble member. The updated mass in -! each half box is the product of the prior mass and the ensemble weight. -! In the wings, the observation likelihood gaussian is convolved with the -! prior gaussian to get an updated weighted gaussian that is assumed to -! represent the posterior outside of the outermost ensemble members. The -! updated ensemble members are chosen so that 1/(N+1) of the updated -! mass is between each member and also on the left and right wings. This -! algorithm is able to deal well with outliers, bimodality and other -! non-gaussian behavior in observation space. It could also be modified to -! deal with non-gaussian likelihoods in the future. - -integer, intent(in) :: ens_size -real(r8), intent(in) :: ens(ens_size), obs, obs_var -real(r8), intent(out) :: obs_inc(ens_size) -real(r8), intent(out) :: rel_weight(ens_size) - -integer :: i, e_ind(ens_size), lowest_box, j -real(r8) :: sx, prior_mean, prior_var, prior_var_d2 -real(r8) :: var_ratio, new_var, new_sd, umass, left_weight, right_weight -real(r8) :: mass(2*ens_size), weight(ens_size), cumul_mass(0:2*ens_size) -real(r8) :: new_mean_left, new_mean_right, prod_weight_left, prod_weight_right -real(r8) :: new_ens(ens_size), mass_sum, const_term -real(r8) :: x(1:2*ens_size - 1), sort_inc(ens_size) - -! The factor a is not defined for this filter for now (could it be???) - -! The relative weights could be used for a multi-dimensional particle-type -! update using update_ens_from_weights. There are algorithmic challenges -! with outliers so this is not currently a supported option. For now, -! rel_weight is simply set to 0 and is unused elsewhere. -rel_weight = 0.0_r8 - -! Do an index sort of the ensemble members; Need sorted ensemble -call index_sort(ens, e_ind, ens_size) - -! Prior distribution is boxcar in the central bins with 1/(n+1) density -! in each intermediate bin. BUT, distribution on the wings is a normal with -! 1/(n + 1) of the mass on each side. - -! Begin by computing a weight for each of the prior ensemble membersA -! This is just evaluating the gaussian likelihood -const_term = 1.0_r8 / (sqrt(2.0_r8 * PI) * sqrt(obs_var)) -do i = 1, ens_size - weight(i) = const_term * exp(-1.0_r8 * (ens(i) - obs)**2 / (2.0_r8 * obs_var)) -end do - -! Compute the points that bound all the updated mass boxes; start with ensemble -do i = 1, ens_size - x(2*i - 1) = ens(e_ind(i)) -end do -! Compute the mid-point interior boundaries; these are halfway between ensembles -do i = 2, 2*ens_size - 2, 2 - x(i) = (x(i - 1) + x(i + 1)) / 2.0_r8 -end do - -! Compute the s.d. of the ensemble for getting the gaussian wings -sx = sum(ens) -prior_mean = sx / ens_size -prior_var = sum((ens - prior_mean)**2) / (ens_size - 1) - -! Need to normalize the wings so they have 1/(ens_size + 1) mass outside -! Since 1/2 of a normal is outside, need to multiply by 2 / (ens_size + 1) - -! Need some sort of width for the boundary kernel, try 1/2 the VAR for now -prior_var_d2 = prior_var / 2.0_r8 - -! Compute the product of the obs error gaussian with the prior gaussian (EAKF) -! Left wing first -var_ratio = obs_var / (prior_var_d2 + obs_var) -new_var = var_ratio * prior_var_d2 -new_sd = sqrt(new_var) -new_mean_left = var_ratio * (ens(e_ind(1)) + prior_var_d2*obs / obs_var) -new_mean_right = var_ratio * (ens(e_ind(ens_size)) + prior_var_d2*obs / obs_var) -! REMEMBER, this product has an associated weight which must be taken into account -! See Anderson and Anderson for this weight term (or tutorial kernel filter) -prod_weight_left = 2.71828_r8 ** (-0.5_r8 * (ens(e_ind(1))**2 / prior_var_d2 + & - obs**2 / obs_var - new_mean_left**2 / new_var)) / sqrt(2.0_r8 * PI) - -prod_weight_right = 2.71828_r8 ** (-0.5_r8 * (ens(e_ind(ens_size))**2 / prior_var_d2 + & - obs**2 / obs_var - new_mean_right**2 / new_var)) / sqrt(2.0_r8 * PI) - -! Split into 2*ens_size domains; mass in each is computed -! Start by computing mass in the outermost (gaussian) regions -mass(1) = norm_cdf(ens(e_ind(1)), new_mean_left, new_sd) * & - prod_weight_left * (2.0_r8 / (ens_size + 1.0_r8)) -mass(2*ens_size) = (1.0_r8 - norm_cdf(ens(e_ind(ens_size)), new_mean_right, & - new_sd)) * prod_weight_right * (2.0_r8 / (ens_size + 1.0_r8)) - -! Compute mass in the inner half boxes that have ensemble point on the left -do i = 2, 2*ens_size - 2, 2 - mass(i) = (1.0_r8 / (2.0_r8 * (ens_size + 1.0_r8))) * weight(e_ind(i/2)) -end do - -! Now right inner half boxes -do i = 3, 2*ens_size - 1, 2 - mass(i) = (1.0_r8 / (2.0_r8 * (ens_size + 1.0_r8))) * weight(e_ind(i/2 + 1)) -end do - -! Now normalize the mass in the different bins -mass_sum = sum(mass) -mass = mass / mass_sum - -! Find cumulative mass at each box boundary and middle boundary -cumul_mass(0) = 0.0_r8 -do i = 1, 2*ens_size - cumul_mass(i) = cumul_mass(i - 1) + mass(i) -end do - -! Get resampled ensemble, Need 1/(ens_size + 1) between each -umass = 1.0_r8 / (ens_size + 1.0_r8) - -! Begin search at bottom of lowest box, but then update for efficiency -lowest_box = 1 - -! Find each new ensemble members location -do i = 1, ens_size - ! If it's in the inner or outer range have to use normal - if(umass < cumul_mass(1)) then - ! In the first normal box - left_weight = (1.0_r8 / mass_sum) * prod_weight_left * (2.0_r8 / (ens_size + 1.0_r8)) - call weighted_norm_inv(left_weight, new_mean_left, new_sd, umass, new_ens(i)) - else if(umass > cumul_mass(2*ens_size - 1)) then - ! In the last normal box; Come in from the outside - right_weight = (1.0_r8 / mass_sum) * prod_weight_right * (2.0_r8 / (ens_size + 1.0_r8)) - call weighted_norm_inv(right_weight, new_mean_right, new_sd, 1.0_r8 - umass, new_ens(i)) - new_ens(i) = new_mean_right + (new_mean_right - new_ens(i)) - else - ! In one of the inner uniform boxes. - FIND_BOX:do j = lowest_box, 2 * ens_size - 2 - ! Find the box that this mass is in - if(umass >= cumul_mass(j) .and. umass <= cumul_mass(j + 1)) then - new_ens(i) = x(j) + ((umass - cumul_mass(j)) / (cumul_mass(j+1) - cumul_mass(j))) * & - (x(j + 1) - x(j)) - ! Don't need to search lower boxes again - lowest_box = j - exit FIND_BOX - end if - end do FIND_BOX - endif - ! Want equally partitioned mass in update with exception that outermost boxes have half - umass = umass + 1.0_r8 / (ens_size + 1.0_r8) -end do - -! Can now compute sorted increments -do i = 1, ens_size - sort_inc(i) = new_ens(i) - ens(e_ind(i)) -end do - -! Now, need to convert to increments for unsorted -do i = 1, ens_size - obs_inc(e_ind(i)) = sort_inc(i) -end do - -end subroutine obs_increment_boxcar - - - subroutine obs_increment_rank_histogram(ens, ens_size, prior_var, & obs, obs_var, obs_inc) !------------------------------------------------------------------------ ! -! Revised 14 November 2008 -! ! Does observation space update by approximating the prior distribution by ! a rank histogram. Prior and posterior are assumed to have 1/(n+1) probability ! mass between each ensemble member. The tails are assumed to be gaussian with @@ -1730,16 +1635,13 @@ subroutine obs_increment_rank_histogram(ens, ens_size, prior_var, & ! and new ensemble members are located so that 1/(n+1) of the mass is between ! each member and on the tails. -! This code is still under development. Please contact Jeff Anderson at -! jla@ucar.edu if you are interested in trying it. - integer, intent(in) :: ens_size real(r8), intent(in) :: ens(ens_size), prior_var, obs, obs_var real(r8), intent(out) :: obs_inc(ens_size) integer :: i, e_ind(ens_size), lowest_box, j -real(r8) :: prior_sd, var_ratio, umass, left_amp, right_amp -real(r8) :: left_sd, left_var, right_sd, right_var, left_mean, right_mean +real(r8) :: prior_sd, var_ratio, umass, left_amp, right_amp, norm_const +real(r8) :: left_mean, right_mean real(r8) :: mass(ens_size + 1), like(ens_size), cumul_mass(0:ens_size + 1) real(r8) :: nmass(ens_size + 1) real(r8) :: new_mean_left, new_mean_right, prod_weight_left, prod_weight_right @@ -1752,20 +1654,16 @@ subroutine obs_increment_rank_histogram(ens, ens_size, prior_var, & ! Do an index sort of the ensemble members; Will want to do this very efficiently call index_sort(ens, e_ind, ens_size) +x = ens(e_ind) +! Define normal PDF constant term +norm_const = 1.0_r8 / sqrt(2.0_r8 * PI * obs_var) +! Compute likelihood for each ensemble member; just evaluate the gaussian do i = 1, ens_size - ! The boundaries of the interior bins are just the sorted ensemble members - x(i) = ens(e_ind(i)) - ! Compute likelihood for each ensemble member; just evaluate the gaussian - ! No need to compute the constant term since relative likelihood is what matters - like(i) = exp(-1.0_r8 * (x(i) - obs)**2 / (2.0_r8 * obs_var)) + like(i) = norm_const * exp(-1.0_r8 * (x(i) - obs)**2 / (2.0_r8 * obs_var)) end do -! Prior distribution is boxcar in the central bins with 1/(n+1) density -! in each intermediate bin. BUT, distribution on the tails is a normal with -! 1/(n + 1) of the mass on each side. - -! Can now compute the mean likelihood density in each interior bin +! Compute approx likelihood each interior bin (average of bounding likelihoods) do i = 2, ens_size like_dense(i) = ((like(i - 1) + like(i)) / 2.0_r8) end do @@ -1775,64 +1673,55 @@ subroutine obs_increment_rank_histogram(ens, ens_size, prior_var, & ! For unit normal, find distance from mean to where cdf is 1/(n+1) ! Lots of this can be done once in first call and then saved -call weighted_norm_inv(1.0_r8, 0.0_r8, 1.0_r8, & - 1.0_r8 / (ens_size + 1.0_r8), dist_for_unit_sd) +dist_for_unit_sd = inv_weighted_normal_cdf(1.0_r8, 0.0_r8, 1.0_r8, & + 1.0_r8 / (ens_size + 1.0_r8)) dist_for_unit_sd = -1.0_r8 * dist_for_unit_sd ! Have variance of tails just be sample prior variance ! Mean is adjusted so that 1/(n+1) is outside left_mean = x(1) + dist_for_unit_sd * prior_sd -left_var = prior_var -left_sd = prior_sd ! Same for right tail right_mean = x(ens_size) - dist_for_unit_sd * prior_sd -right_var = prior_var -right_sd = prior_sd if(gaussian_likelihood_tails) then !*************** Block to do Gaussian-Gaussian on tail ************** ! Compute the product of the obs likelihood gaussian with the priors ! Left tail gaussian first - var_ratio = obs_var / (left_var + obs_var) - new_var_left = var_ratio * left_var + var_ratio = obs_var / (prior_var + obs_var) + new_var_left = var_ratio * prior_var new_sd_left = sqrt(new_var_left) - new_mean_left = var_ratio * (left_mean + left_var*obs / obs_var) + new_mean_left = var_ratio * (left_mean + prior_var*obs / obs_var) ! REMEMBER, this product has an associated weight which must be taken into account ! See Anderson and Anderson for this weight term (or tutorial kernel filter) - ! NOTE: The constant term has been left off the likelihood so we don't have - ! to divide by sqrt(2 PI) in this expression - prod_weight_left = exp(-0.5_r8 * (left_mean**2 / left_var + & + prod_weight_left = exp(-0.5_r8 * (left_mean**2 / prior_var + & obs**2 / obs_var - new_mean_left**2 / new_var_left)) / & - sqrt(left_var + obs_var) + sqrt(prior_var + obs_var) / sqrt(2.0_r8 * PI) ! Determine how much mass is in the updated tails by computing gaussian cdf - mass(1) = norm_cdf(x(1), new_mean_left, new_sd_left) * prod_weight_left + mass(1) = normal_cdf(x(1), new_mean_left, new_sd_left) * prod_weight_left ! Same for the right tail - var_ratio = obs_var / (right_var + obs_var) - new_var_right = var_ratio * right_var + var_ratio = obs_var / (prior_var + obs_var) + new_var_right = var_ratio * prior_var new_sd_right = sqrt(new_var_right) - new_mean_right = var_ratio * (right_mean + right_var*obs / obs_var) - ! NOTE: The constant term has been left off the likelihood so we don't have - ! to divide by sqrt(2 PI) in this expression - prod_weight_right = exp(-0.5_r8 * (right_mean**2 / right_var + & + new_mean_right = var_ratio * (right_mean + prior_var*obs / obs_var) + prod_weight_right = exp(-0.5_r8 * (right_mean**2 / prior_var + & obs**2 / obs_var - new_mean_right**2 / new_var_right)) / & - sqrt(right_var + obs_var) + sqrt(prior_var + obs_var) / sqrt(2.0_r8 * PI) ! Determine how much mass is in the updated tails by computing gaussian cdf - mass(ens_size + 1) = (1.0_r8 - norm_cdf(x(ens_size), new_mean_right, & - new_sd_right)) * prod_weight_right + mass(ens_size + 1) = (1.0_r8 - normal_cdf(x(ens_size), new_mean_right, new_sd_right)) * & + prod_weight_right + !************ End Block to do Gaussian-Gaussian on tail ************** else !*************** Block to do flat tail for likelihood **************** ! Flat tails: THIS REMOVES ASSUMPTIONS ABOUT LIKELIHOOD AND CUTS COST - new_var_left = left_var - new_sd_left = left_sd + new_sd_left = prior_sd new_mean_left = left_mean prod_weight_left = like(1) mass(1) = like(1) / (ens_size + 1.0_r8) ! Same for right tail - new_var_right = right_var - new_sd_right = right_sd + new_sd_right = prior_sd new_mean_right = right_mean prod_weight_right = like(ens_size) mass(ens_size + 1) = like(ens_size) / (ens_size + 1.0_r8) @@ -1885,13 +1774,13 @@ subroutine obs_increment_rank_histogram(ens, ens_size, prior_var, & if(umass < cumul_mass(1)) then ! It's in the left tail ! Get position of x in weighted gaussian where the cdf has value umass - call weighted_norm_inv(left_amp, new_mean_left, new_sd_left, & - umass, new_ens(i)) + new_ens(i) = inv_weighted_normal_cdf(left_amp, new_mean_left, new_sd_left, & + umass) else if(umass > cumul_mass(ens_size)) then ! It's in the right tail ! Get position of x in weighted gaussian where the cdf has value umass - call weighted_norm_inv(right_amp, new_mean_right, new_sd_right, & - 1.0_r8 - umass, new_ens(i)) + new_ens(i) = inv_weighted_normal_cdf(right_amp, new_mean_right, new_sd_right, & + 1.0_r8 - umass) ! Coming in from the right, use symmetry after pretending its on left new_ens(i) = new_mean_right + (new_mean_right - new_ens(i)) else @@ -2009,8 +1898,8 @@ subroutine update_ens_from_weights(ens, ens_size, rel_weight, ens_inc) ! Need to normalize the wings so they have 1/(2*ens_size) mass outside ! Use cdf to find out how much mass is left of 1st member, right of last -total_mass_left = norm_cdf(ens(e_ind(1)), prior_mean, prior_sd) -total_mass_right = 1.0_r8 - norm_cdf(ens(e_ind(ens_size)), prior_mean, prior_sd) +total_mass_left = normal_cdf(ens(e_ind(1)), prior_mean, prior_sd) +total_mass_right = 1.0_r8 - normal_cdf(ens(e_ind(ens_size)), prior_mean, prior_sd) ! Find the mass in each division given the initial equal partition and the weights updated_mass(1) = rel_weight(e_ind(1)) / (2.0_r8 * ens_size) @@ -2051,10 +1940,10 @@ subroutine update_ens_from_weights(ens, ens_size, rel_weight, ens_inc) ! If it's in the inner or outer range have to use normal if(mass < cumul_mass(1)) then ! In the first normal box - call weighted_norm_inv(alpha(1), prior_mean, prior_sd, mass, new_ens(i)) + new_ens(i) = inv_weighted_normal_cdf(alpha(1), prior_mean, prior_sd, mass) else if(mass > cumul_mass(2*ens_size - 1)) then ! In the last normal box; Come in from the outside - call weighted_norm_inv(alpha(2), prior_mean, prior_sd, 1.0_r8 - mass, new_ens(i)) + new_ens(i) = inv_weighted_normal_cdf(alpha(2), prior_mean, prior_sd, 1.0_r8 - mass) new_ens(i) = prior_mean + (prior_mean - new_ens(i)) else ! In one of the inner uniform boxes. Make this much more efficient search? @@ -2185,130 +2074,6 @@ function cov_and_impact_factors(base_obs_loc, base_obs_type, state_loc, state_ki end function cov_and_impact_factors -!------------------------------------------------------------------------ - -function norm_cdf(x_in, mean, sd) - -! Approximate cumulative distribution function for normal -! with mean and sd evaluated at point x_in -! Only works for x>= 0. - -real(r8) :: norm_cdf -real(r8), intent(in) :: x_in, mean, sd - -real(digits12) :: x, p, b1, b2, b3, b4, b5, t, density, nx - -! Convert to a standard normal -nx = (x_in - mean) / sd - -x = abs(nx) - - -! Use formula from Abramowitz and Stegun to approximate -p = 0.2316419_digits12 -b1 = 0.319381530_digits12 -b2 = -0.356563782_digits12 -b3 = 1.781477937_digits12 -b4 = -1.821255978_digits12 -b5 = 1.330274429_digits12 - -t = 1.0_digits12 / (1.0_digits12 + p * x) - -density = (1.0_digits12 / sqrt(2.0_digits12 * PI)) * exp(-x*x / 2.0_digits12) - -norm_cdf = 1.0_digits12 - density * & - ((((b5 * t + b4) * t + b3) * t + b2) * t + b1) * t - -if(nx < 0.0_digits12) norm_cdf = 1.0_digits12 - norm_cdf - -!write(*, *) 'cdf is ', norm_cdf - -end function norm_cdf - - -!------------------------------------------------------------------------ - -subroutine weighted_norm_inv(alpha, mean, sd, p, x) - -! Find the value of x for which the cdf of a N(mean, sd) multiplied times -! alpha has value p. - -real(r8), intent(in) :: alpha, mean, sd, p -real(r8), intent(out) :: x - -real(r8) :: np - -! Can search in a standard normal, then multiply by sd at end and add mean -! Divide p by alpha to get the right place for weighted normal -np = p / alpha - -! Find spot in standard normal -call norm_inv(np, x) - -! Add in the mean and normalize by sd -x = mean + x * sd - -end subroutine weighted_norm_inv - - -!------------------------------------------------------------------------ - -subroutine norm_inv(p, x) - -real(r8), intent(in) :: p -real(r8), intent(out) :: x - -! normal inverse -! translate from http://home.online.no/~pjacklam/notes/invnorm -! a routine written by john herrero - -real(r8) :: p_low,p_high -real(r8) :: a1,a2,a3,a4,a5,a6 -real(r8) :: b1,b2,b3,b4,b5 -real(r8) :: c1,c2,c3,c4,c5,c6 -real(r8) :: d1,d2,d3,d4 -real(r8) :: q,r -a1 = -39.69683028665376_digits12 -a2 = 220.9460984245205_digits12 -a3 = -275.9285104469687_digits12 -a4 = 138.357751867269_digits12 -a5 = -30.66479806614716_digits12 -a6 = 2.506628277459239_digits12 -b1 = -54.4760987982241_digits12 -b2 = 161.5858368580409_digits12 -b3 = -155.6989798598866_digits12 -b4 = 66.80131188771972_digits12 -b5 = -13.28068155288572_digits12 -c1 = -0.007784894002430293_digits12 -c2 = -0.3223964580411365_digits12 -c3 = -2.400758277161838_digits12 -c4 = -2.549732539343734_digits12 -c5 = 4.374664141464968_digits12 -c6 = 2.938163982698783_digits12 -d1 = 0.007784695709041462_digits12 -d2 = 0.3224671290700398_digits12 -d3 = 2.445134137142996_digits12 -d4 = 3.754408661907416_digits12 -p_low = 0.02425_digits12 -p_high = 1_digits12 - p_low -! Split into an inner and two outer regions which have separate fits -if(p < p_low) then - q = sqrt(-2.0_digits12 * log(p)) - x = (((((c1*q + c2)*q + c3)*q + c4)*q + c5)*q + c6) / & - ((((d1*q + d2)*q + d3)*q + d4)*q + 1.0_digits12) -else if(p > p_high) then - q = sqrt(-2.0_digits12 * log(1.0_digits12 - p)) - x = -(((((c1*q + c2)*q + c3)*q + c4)*q + c5)*q + c6) / & - ((((d1*q + d2)*q + d3)*q + d4)*q + 1.0_digits12) -else - q = p - 0.5_digits12 - r = q*q - x = (((((a1*r + a2)*r + a3)*r + a4)*r + a5)*r + a6)*q / & - (((((b1*r + b2)*r + b3)*r + b4)*r + b5)*r + 1.0_digits12) -endif - -end subroutine norm_inv - !------------------------------------------------------------------------ subroutine set_assim_tools_trace(execution_level, timestamp_level) @@ -2648,29 +2413,6 @@ subroutine log_namelist_selections(num_special_cutoff, cache_override) integer :: i -select case (filter_kind) - case (1) - msgstring = 'Ensemble Adjustment Kalman Filter (EAKF)' - case (2) - msgstring = 'Ensemble Kalman Filter (ENKF)' - case (3) - msgstring = 'Kernel filter' - case (4) - msgstring = 'observation space particle filter' - case (5) - msgstring = 'random draw from posterior' - case (6) - msgstring = 'deterministic draw from posterior with fixed kurtosis' - case (7) - msgstring = 'Boxcar' - case (8) - msgstring = 'Rank Histogram Filter' - case default - call error_handler(E_ERR, 'assim_tools_init:', 'illegal filter_kind value, valid values are 1-8', & - source) -end select -call error_handler(E_MSG, 'assim_tools_init:', 'Selected filter type is '//trim(msgstring)) - if (adjust_obs_impact) then call allocate_impact_table(obs_impact_table) call read_impact_table(obs_impact_filename, obs_impact_table, allow_any_impact_values, "allow_any_impact_values") diff --git a/assimilation_code/modules/assimilation/assim_tools_mod.nml b/assimilation_code/modules/assimilation/assim_tools_mod.nml index a8a5c11d2a..b0a0b76d71 100644 --- a/assimilation_code/modules/assimilation/assim_tools_mod.nml +++ b/assimilation_code/modules/assimilation/assim_tools_mod.nml @@ -8,18 +8,15 @@ # enabling sampling error correction is generally beneficial # the default file is in assimilation_code/programs/gen_sampling_err_table/work -# With a deterministic filter (filter_kind == 1 or 8) -# and a deterministic inflation (filter_nml:inf_deterministic == .true.) -# sort_obs_inc is not needed and is expensive. Should be .false. +# sort_obs_inc applies to ENKF only. # specify special localization items in the same order # in both lists, the same number of items &assim_tools_nml - filter_kind = 1 cutoff = 0.2 distribute_mean = .false. - sort_obs_inc = .false. + sort_obs_inc = .true. spread_restoration = .false. sampling_error_correction = .false. adaptive_localization_threshold = -1 diff --git a/assimilation_code/modules/assimilation/assim_tools_mod.rst b/assimilation_code/modules/assimilation/assim_tools_mod.rst index 88d0923164..502e2419de 100644 --- a/assimilation_code/modules/assimilation/assim_tools_mod.rst +++ b/assimilation_code/modules/assimilation/assim_tools_mod.rst @@ -10,25 +10,6 @@ for both mean and spread. In addition, algorithms to do a variety of flavors of particle filter, and kernel filters are included. The parallel implementation that allows each observation to update all state variables that are close to it at the same time is described in Anderson and Collins, 2007. -Filter types ------------- - -Available observation space filter types include: - -- 1 = EAKF (Ensemble Adjustment Kalman Filter, see Anderson 2001) -- 2 = ENKF (Ensemble Kalman Filter) -- 3 = Kernel filter -- 4 = Observation Space Particle filter -- 5 = Random draw from posterior (contact dart@ucar.edu before using) -- 6 = Deterministic draw from posterior with fixed kurtosis (ditto) -- 7 = Boxcar kernel filter -- 8 = Rank Histogram filter (see Anderson 2010) -- 9 = Particle filter (see Poterjoy 2016) - -We recommend using type=1, the EAKF. Note that although the algorithm is expressed in a slightly different form, the -EAKF is identical to the EnSRF (Ensemble Square Root Filter) described by Whitaker and Hamill in 2002. Highly -non-gaussian distributions may get better results from type=8, Rank Histogram filter. - Localization ------------ @@ -169,10 +150,9 @@ namelist. :: &assim_tools_nml - filter_kind = 1 cutoff = 0.2 distribute_mean = .false. - sort_obs_inc = .false. + sort_obs_inc = .true. spread_restoration = .false. sampling_error_correction = .false. adaptive_localization_threshold = -1 @@ -195,36 +175,6 @@ namelist. Description of each namelist entry ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -``filter_kind`` - *type:* integer - - Selects the variant of filter to be used. - - - 1 = EAKF (Ensemble Adjustment Kalman Filter, see Anderson 2001) - - 2 = ENKF (Ensemble Kalman Filter) - - 3 = Kernel filter - - 4 = Observation Space Particle filter - - 5 = Random draw from posterior (contact dart@ucar.edu before using) - - 6 = Deterministic draw from posterior with fixed kurtosis (ditto) - - 7 = Boxcar kernel filter - - 8 = Rank Histogram filter (see Anderson 2010) - - 9 = Particle filter (see Poterjoy 2016) - - The EAKF is the most commonly used filter. Note that although the algorithm is expressed in a slightly different - form, the EAKF is identical to the EnSRF (Ensemble Square Root Filter) described by Whitaker and Hamill in 2002. - - The Rank Histgram filter can be more successful for highly nongaussian distributions. - - Jon Poterjoy's Particle filter is included with this code release. To use, it, overwrite ``assim_tools_mod.f90`` with - ``assim_tools_mod.pf.f90`` and rebuild filter. - - :: - - - $ mv assimilation_code/modules/assimilation/assim_tools_mod.pf.f90 assimilation_code/modules/assimilation/assim_tools_mod.f90 - - There are additional namelist items in this version specific to the particle filter. Read the code for more details. - ``cutoff`` *type:* real(r8) @@ -245,10 +195,11 @@ Description of each namelist entry *type:* logical If true, the final increments from obs_increment are sorted so that the mean increment value is as small as possible. - This minimizes regression errors when non-deterministic filters or error correction algorithms are applied. HOWEVER, - when using deterministic filters (filter_kind == 1 or 8) with no inflation or a combination of a determinstic filter + Applies to ENKF only. + ``sort_obs_inc`` minimizes regression errors when non-deterministic filters or error correction algorithms are applied. HOWEVER, + when using deterministic filters with no inflation or a combination of a determinstic filter and deterministic inflation (filter_nml:inf_deterministic = .TRUE.) sorting the increments is both unnecessary and - expensive. A warning is printed to stdout and the log and the sorting is skipped. + expensive. ``spread_restoration`` *type:* logical @@ -256,6 +207,11 @@ Description of each namelist entry True turns on algorithm to restore amount of spread that would be expected to be lost if underlying obs/state variable correlation were really 0. +.. Warning:: + + ``spread_restoration`` is not supported in this version, please reach out to the DAReS team dart@ucar.edu + if you need to use spread_restoration. + ``sampling_error_correction`` *type:* logical @@ -307,12 +263,12 @@ Description of each namelist entry ``rectangular_quadrature`` *type:* logical - Only relevant for filter type 8 and recommended to leave ``.true.``. + Only relevant for filter type UNBOUNDED_RHF and recommended to leave ``.true.``. ``gaussian_likelihood_tails`` *type:* logical - Only relevant for filter type 8 and recommended to leave ``.false.``. + Only relevant for filter type UNBOUNDED_RHF and recommended to leave ``.false.``. ``close_obs_caching`` *type:* logical diff --git a/assimilation_code/modules/assimilation/beta_distribution_mod.f90 b/assimilation_code/modules/assimilation/beta_distribution_mod.f90 new file mode 100644 index 0000000000..eec0a327c3 --- /dev/null +++ b/assimilation_code/modules/assimilation/beta_distribution_mod.f90 @@ -0,0 +1,402 @@ +! DART software - Copyright UCAR. This open source software is provided +! by UCAR, "as is", without charge, subject to all terms of use at +! http://www.image.ucar.edu/DAReS/DART/DART_download + +! Thanks to Chris Riedel who developed the methods in this module. + +module beta_distribution_mod + +use types_mod, only : r8, PI, missing_r8 + +use utilities_mod, only : E_ERR, error_handler + +use random_seq_mod, only : random_seq_type, random_uniform + +use distribution_params_mod, only : distribution_params_type + +use normal_distribution_mod, only : inv_cdf + +implicit none +private + +public :: beta_cdf, inv_beta_cdf, & + beta_cdf_params, inv_beta_cdf_params, & + beta_pdf, random_beta, test_beta, set_beta_params_from_ens + +character(len=512) :: errstring +character(len=*), parameter :: source = 'beta_distribution_mod.f90' + +real(r8), parameter :: failed_value = -99.9_r8 + +contains + +!----------------------------------------------------------------------- + +subroutine test_beta + +! This routine provides limited tests of the numerics in this module. It begins +! by comparing a handful of cases of the pdf and cdf to results from Matlab. It +! then tests the quality of the inverse cdf for a single shape/scale pair. Failing +! these tests suggests a serious problem. Passing them does not indicate that +! there are acceptable results for all possible inputs. + +real(r8) :: x, y, p, inv +real(r8) :: alpha, beta, max_diff +integer :: i + +! Comparative results for a handful of cases from MATLAB21a +real(r8) :: pdf_diff(7), cdf_diff(7) +real(r8) :: malpha(7) = [0.5_r8, 5.0_r8, 1.0_r8, 2.0_r8, 2.0_r8, 5.0_r8, 0.5_r8] +real(r8) :: mbeta(7) = [0.5_r8, 1.0_r8, 3.0_r8, 2.0_r8, 5.0_r8, 2.0_r8, 1.0_r8] +real(r8) :: mx(7) = [0.1_r8, 0.3_r8, 0.5_r8, 0.6_r8, 0.7_r8, 0.8_r8, 0.9_r8] +! Generated by matlab betapdf(mx, malpha, mbeta) +real(r8) :: mpdf(7) = [1.061032953945969_r8, 0.040500000000000_r8, 0.750000000000000_r8, & + 1.440000000000000_r8, 0.170100000000000_r8, 2.457600000000000_r8, & + 0.527046276694730_r8] +! Generated by matlab gamcdf(mx, malpha, mbeta) +real(r8) :: mcdf(7) = [0.204832764699133_r8, 0.002430000000000_r8, 0.875000000000000_r8, & + 0.648000000000000_r8, 0.989065000000000_r8, 0.655360000000000_r8, & + 0.948683298050514_r8] + +! Compare to matlab +write(*, *) 'Absolute value of differences should be less than 1e-15' +do i = 1, 7 + pdf_diff(i) = beta_pdf(mx(i), malpha(i), mbeta(i)) - mpdf(i) + cdf_diff(i) = beta_cdf(mx(i), malpha(i), mbeta(i), 0.0_r8, 1.0_r8) - mcdf(i) + write(*, *) i, pdf_diff(i), cdf_diff(i) +end do + +! Test many x values for cdf and inverse cdf for a single set of alpha and beta +alpha = 5.0_r8 +beta = 2.0_r8 + +max_diff = -1.0_r8 +do i = 0, 1000 + x = i / 1000.0_r8 + p = beta_pdf(x, alpha, beta) + y = beta_cdf(x, alpha, beta, 0.0_r8, 1.0_r8) + inv = inv_beta_cdf(y, alpha, beta, 0.0_r8, 1.0_r8) + max_diff = max(abs(x - inv), max_diff) +end do + +write(*, *) '----------------------------' +write(*, *) 'max difference in inversion is ', max_diff +write(*, *) 'max difference should be less than 1e-14' + +end subroutine test_beta + +!----------------------------------------------------------------------- + +function inv_beta_cdf_params(quantile, p) result(x) + +real(r8) :: x +real(r8), intent(in) :: quantile +type(distribution_params_type), intent(in) :: p + +x = inv_cdf(quantile, beta_cdf_params, inv_beta_first_guess_params, p) + +end function inv_beta_cdf_params + +!----------------------------------------------------------------------- + +function inv_beta_cdf(quantile, alpha, beta, lower_bound, upper_bound) result(x) + +real(r8) :: x +real(r8), intent(in) :: quantile +real(r8), intent(in) :: alpha, beta +real(r8), intent(in) :: lower_bound, upper_bound + +! Given a quantile, finds the value of x for which the scaled beta cdf +! with alpha and beta has approximately this quantile + +type(distribution_params_type) :: p + +if (alpha <= 0.0_r8 .or. beta <= 0.0_r8) then + errstring = 'Negative input beta parameters' + call error_handler(E_ERR, 'inv_beta_cdf', errstring, source) +endif + +p%params(1) = alpha; p%params(2) = beta +! Beta must be bounded on both sides +p%lower_bound = lower_bound; p%upper_bound = upper_bound + +x = inv_beta_cdf_params(quantile, p) + +! Undo the scaling +x = x * (upper_bound - lower_bound) + lower_bound + +end function inv_beta_cdf + +!--------------------------------------------------------------------------- + +function beta_pdf(x, alpha, beta) + +! Returns the probability density of a beta function with alpha and beta +! at the value x + +! Returns a large negative value if called with illegal values + +real(r8) :: beta_pdf +real(r8), intent(in) :: x, alpha, beta + +real(r8) :: gamma_ratio + +! Parameters alpha and beta must be positive +if(alpha <= 0.0_r8 .or. beta <= 0.0_r8) then + beta_pdf = failed_value +elseif(x < 0.0 .or. x > 1.0_r8) then + beta_pdf = failed_value +elseif(alpha == 1.0_r8 .and. x == 0.0_r8) then + ! Tricky stuff for x = 0 or 1; + beta_pdf = beta +elseif(beta == 1.0_r8 .and. x == 1.0_r8) then + beta_pdf = alpha +elseif(alpha < 1.0_r8 .and. x == 0.0_r8) then + beta_pdf = failed_value +elseif(beta < 1.0_r8 .and. x == 1.0_r8) then + beta_pdf = failed_value +else + ! Use definition via gammas since this is a Fortran intrinsic + gamma_ratio = gamma(alpha) * gamma(beta) / gamma(alpha + beta) + beta_pdf = x**(alpha - 1.0_r8) * (1.0_r8 - x)**(beta - 1.0_r8) / gamma_ratio +endif + +end function beta_pdf + +!--------------------------------------------------------------------------- + +function beta_cdf_params(x, p) + +real(r8) :: beta_cdf_params +real(r8), intent(in) :: x +type(distribution_params_type), intent(in) :: p + +real(r8) :: alpha, beta + +alpha = p%params(1); beta = p%params(2) +beta_cdf_params = beta_cdf(x, alpha, beta, p%lower_bound, p%upper_bound) + +end function beta_cdf_params + +!--------------------------------------------------------------------------- + +function beta_cdf(x, alpha, beta, lower_bound, upper_bound) + +! Returns the cumulative distribution of a beta function with alpha and beta +! at the value x + +! Returns a large negative value if called with illegal values + +real(r8) :: beta_cdf +real(r8), intent(in) :: x, alpha, beta +real(r8), intent(in) :: lower_bound, upper_bound + +! Parameters must be positive +if(alpha <= 0.0_r8 .or. beta <= 0.0_r8) then + beta_cdf = failed_value +elseif(x < 0.0_r8 .or. x > 1.0_r8) then + ! x must be in 0 1 + beta_cdf = failed_value +elseif(x == 0.0_r8) then + beta_cdf = 0.0_r8 +elseif(x == 1.0_r8) then + beta_cdf = 1.0_r8 +elseif (x > (alpha + 1.0_r8)/(alpha + beta + 2.0_r8)) then + beta_cdf = (1.0_r8 - incomplete_beta(beta, alpha, 1.0_r8 - x)) +else + beta_cdf = incomplete_beta(alpha, beta, x) +endif + +end function beta_cdf + +!--------------------------------------------------------------------------- + +function random_beta(r, alpha, beta) + +! Note that this provides same qualitative functionality as a similarly named +! routine in the random_seq_mod that uses a rejection algorithm. However, once +! we have an inverse cdf function for a distribution, it is possible to generate +! random numbers by first getting a draw from a U(0, 1) and then inverting these +! quantiles to get an actual value + +type(random_seq_type), intent(inout) :: r +real(r8), intent(in) :: alpha +real(r8), intent(in) :: beta +real(r8) :: random_beta + +real(r8) :: quantile +if (alpha <= 0.0_r8) then + write(errstring, *) 'Alpha parameter must be positive, was ', alpha + call error_handler(E_ERR, 'random_beta', errstring, source) +endif + +if (beta <= 0.0_r8) then + write(errstring, *) 'Beta parameter must be positive, was ', beta + call error_handler(E_ERR, 'random_beta', errstring, source) +endif + +! Draw from U(0, 1) to get a quantile +quantile = random_uniform(r) +! Invert cdf to get a draw from beta +random_beta = inv_beta_cdf(quantile, alpha, beta, 0.0_r8, 1.0_r8) + +end function random_beta + +!--------------------------------------------------------------------------- + +function incomplete_beta(a,b,x) + +! Computes an approximation of the incomplete beta integral using the continued +! fraction evaluation routine also found in numerical recipes + +real(r8) :: incomplete_beta +real(r8), intent(in) :: a, b, x + +real(r8), parameter :: TINY = 1.0e-30 +real(r8), parameter :: STOP = 1.0e-14 +integer, parameter :: max_iter = 100 + +real(r8) :: front, f, c, d, numerator, cd +integer :: m, iter + +if (x < 0.0_r8 .or. x > 1.0_r8) then + errstring = 'Input value for x is not between 0 - 1' + call error_handler(E_ERR, 'incomplete_beta', errstring, source) +endif + +! Set a default failed value +incomplete_beta = missing_r8 + +front = exp(log(x)*a + log(1.0_r8-x)*b - log_beta(a, b)) / a +f = 1.0_r8 +c = 1.0_r8 +d = 0.0_r8 + +do iter = 0, max_iter + m = floor(iter/2.0_r8) + + ! Initial step numerator is 1 + if (iter == 0) then + numerator = 1.0_r8 + ! Algorithm has an odd and even iteration step + else if (mod(iter, 2) == 0) then + numerator = (m*(b - m)*x)/((a + 2.0_r8*m - 1.0_r8)*(a + 2.0_r8*m)) + else + numerator = -((a + m)*(a + b + m)*x)/((a + 2.0_r8*m)*(a + 2.0_r8*m + 1.0_r8)) + end if + + d = 1.0_r8 + (numerator * d) + if (abs(d) < TINY) d = TINY + d = 1.0_r8 / d + + c = 1.0_r8 + (numerator/c) + if (abs(c) < TINY) c = TINY + + cd = c*d + f = cd*f + + if (abs(1.0_r8 - cd) < STOP) then + incomplete_beta = front * (f - 1.0_r8) + return + end if +end do + +! Error if failed to converge +errstring = 'Alg. did not converge' +call error_handler(E_ERR, 'incomplete_beta', errstring, source) + +end function incomplete_beta + +!--------------------------------------------------------------------------- + +function log_beta(a, b) + +real(r8) :: log_beta +real(r8), intent(in) :: a, b + +log_beta = log(gamma(a)) + log(gamma(b)) - log(gamma(a + b)) + +end function log_beta + +!--------------------------------------------------------------------------- + +function inv_beta_first_guess_params(quantile, p) + +real(r8) :: inv_beta_first_guess_params +real(r8), intent(in) :: quantile +type(distribution_params_type), intent(in) :: p + +real(r8) :: alpha, beta + +alpha = p%params(1); beta = p%params(2) +inv_beta_first_guess_params = inv_beta_first_guess(quantile, alpha, beta, & + p%bounded_below, p%bounded_above, p%lower_bound, p%upper_bound) + +end function inv_beta_first_guess_params + +!--------------------------------------------------------------------------- + +function inv_beta_first_guess(x, alpha, beta, & + bounded_below, bounded_above, lower_bound, upper_bound) + +real(r8) :: inv_beta_first_guess +real(r8), intent(in) :: x +real(r8), intent(in) :: alpha, beta +logical, intent(in) :: bounded_below, bounded_above +real(r8), intent(in) :: lower_bound, upper_bound + +! Need some sort of first guess, should be smarter here +! For starters, take the mean for this alpha and beta +inv_beta_first_guess = alpha/(alpha + beta) + +end function inv_beta_first_guess + +!--------------------------------------------------------------------------- + +subroutine beta_alpha_beta(x, num, alpha, beta) + +! Computes the alpha and beta parameters for a beta distribution from an ensemble +! Assumes the ensemble members are confined to [0, 1] +! This may not be the maximum likelihood estimate + +integer, intent(in) :: num +real(r8), intent(in) :: x(num) +real(r8), intent(out) :: alpha +real(r8), intent(out) :: beta + +real(r8) :: mean, variance + +mean = sum(x) / num +variance = sum((x - mean)**2) / (num- 1) +! Get alpha and beta +alpha = mean**2 * (1.0_r8 - mean) / variance - mean +beta = alpha * (1.0_r8 / mean - 1.0_r8) + +end subroutine beta_alpha_beta + +!--------------------------------------------------------------------------- + +subroutine set_beta_params_from_ens(ens, num, lower_bound, upper_bound, p) + +integer, intent(in) :: num +real(r8), intent(in) :: ens(num) +real(r8), intent(in) :: lower_bound, upper_bound +type(distribution_params_type), intent(inout) :: p + +real(r8) :: alpha, beta + +! Set the bounds info +p%lower_bound = lower_bound; p%upper_bound = upper_bound + +! Get alpha and beta for the scaled ensemble +call beta_alpha_beta(ens, num, alpha, beta) +p%params(1) = alpha +p%params(2) = beta + +end subroutine set_beta_params_from_ens + +!--------------------------------------------------------------------------- + + + +end module beta_distribution_mod diff --git a/assimilation_code/modules/assimilation/bnrh_distribution_mod.f90 b/assimilation_code/modules/assimilation/bnrh_distribution_mod.f90 new file mode 100644 index 0000000000..8034f58467 --- /dev/null +++ b/assimilation_code/modules/assimilation/bnrh_distribution_mod.f90 @@ -0,0 +1,801 @@ +! DART software - Copyright UCAR. This open source software is provided +! by UCAR, "as is", without charge, subject to all terms of use at +! http://www.image.ucar.edu/DAReS/DART/DART_download + +module bnrh_distribution_mod + +use types_mod, only : r8, missing_r8 + +use utilities_mod, only : E_ERR, E_MSG, error_handler + +use sort_mod, only : index_sort + +use normal_distribution_mod, only : normal_cdf, inv_std_normal_cdf, inv_weighted_normal_cdf, & + normal_mean_sd + +use distribution_params_mod, only : distribution_params_type + +implicit none +private + +public :: bnrh_cdf, bnrh_cdf_params, bnrh_cdf_initialized_vector, & + inv_bnrh_cdf, inv_bnrh_cdf_params, get_bnrh_sd, inv_bnrh_cdf_like + +character(len=512) :: errstring +character(len=*), parameter :: source = 'bnrh_distribution_mod.f90' + +! Saves the ensemble size used in the previous call of bnrh_cdf +integer :: saved_ens_size = -99 +! Cached value of dist_for_unit_sd for this saved_ens_size +real(r8), save :: dist_for_unit_sd + +! Parameter to control switch to uniform approximation for normal tail +! This defines how many quantiles the bound is from the outermost ensemble member +! If closer than this, can get into precision error problems with F(F-1(X)) on tails +! Can also get other precision problems with the amplitude for the normal in the bounded region +real(r8), parameter :: uniform_threshold = 0.01_r8 + +contains + +!----------------------------------------------------------------------- +subroutine bnrh_cdf_params(x, ens_size, bounded_below, bounded_above, & + lower_bound, upper_bound, p, quantiles) + +integer, intent(in) :: ens_size +real(r8), intent(in) :: x(ens_size) +logical, intent(in) :: bounded_below, bounded_above +real(r8), intent(in) :: lower_bound, upper_bound +type(distribution_params_type), intent(inout) :: p +real(r8), intent(out) :: quantiles(ens_size) + +real(r8) :: tail_amp_left, tail_mean_left, tail_sd_left +real(r8) :: tail_amp_right, tail_mean_right, tail_sd_right +real(r8) :: sort_ens(ens_size) +logical :: do_uniform_tail_left, do_uniform_tail_right + +call bnrh_cdf(x, ens_size, bounded_below, bounded_above, lower_bound, upper_bound, & + sort_ens, quantiles, & + tail_amp_left, tail_mean_left, tail_sd_left, do_uniform_tail_left, & + tail_amp_right, tail_mean_right, tail_sd_right, do_uniform_tail_right) + +! Store the info about this cdf in the distribution_params_type +call pack_bnrh_params(ens_size, bounded_below, bounded_above, lower_bound, upper_bound, & + do_uniform_tail_left, do_uniform_tail_right, tail_amp_left, tail_amp_right, & + tail_mean_left, tail_mean_right, tail_sd_left, tail_sd_right, sort_ens, p) + +end subroutine bnrh_cdf_params + +!----------------------------------------------------------------------- + +subroutine bnrh_cdf(x, ens_size, bounded_below, bounded_above, lower_bound, upper_bound, & + sort_x, quantiles, & + tail_amp_left, tail_mean_left, tail_sd_left, do_uniform_tail_left, & + tail_amp_right, tail_mean_right, tail_sd_right, do_uniform_tail_right) + +integer, intent(in) :: ens_size +real(r8), intent(in) :: x(ens_size) +logical, intent(in) :: bounded_below, bounded_above +real(r8), intent(in) :: lower_bound, upper_bound +real(r8), intent(out) :: sort_x(ens_size) +real(r8), intent(out) :: quantiles(ens_size) +real(r8), intent(out) :: tail_amp_left, tail_mean_left, tail_sd_left +real(r8), intent(out) :: tail_amp_right, tail_mean_right, tail_sd_right +logical, intent(out) :: do_uniform_tail_left, do_uniform_tail_right + +real(r8) :: q(ens_size) +real(r8) :: del_q, mean, bound_quantile +integer :: sort_index(ens_size), indx, i + +! Computes all information about a rank histogram cdf given the ensemble and bounds + +! Get ensemble mean and sd +call normal_mean_sd(x, ens_size, mean, tail_sd_left) +tail_sd_right = tail_sd_left + +! Don't know what to do if sd is 0; tail_sds returned are illegal value to indicate this +if(tail_sd_left <= 0.0_r8) then + tail_sd_left = -99_r8 + tail_sd_right = -99_r8 + return +endif + +! Sort. For now, don't worry about efficiency, but may need to somehow pass previous +! sorting indexes and use a sort that is faster for nearly sorted data. Profiling can guide the need +call index_sort(x, sort_index, ens_size) +sort_x = x(sort_index) + +! Fail if lower bound is larger than smallest ensemble member +if(bounded_below) then + ! Do in two ifs in case the bound is not defined + if(sort_x(1) < lower_bound) then + write(errstring, *) 'Smallest ensemble member less than lower bound', & + sort_x(1), lower_bound + call error_handler(E_ERR, 'bnrh_cdf', errstring, source) + endif +endif + +! Fail if upper bound is smaller than the largest ensemble member +if(bounded_above) then + if(sort_x(ens_size) > upper_bound) then + write(errstring, *) 'Largest ensemble member greater than upper bound', & + sort_x(ens_size), upper_bound + call error_handler(E_ERR, 'bnrh_cdf', errstring, source) + endif +endif + +! The ensemble size array q contains the sorted quantiles corresponding to the sorted ensemble sort_x +call ens_quantiles(sort_x, ens_size, & + bounded_below, bounded_above, lower_bound, upper_bound, q) +! The quantiles array has the unsorted quantiles corresponding to the unsorted input ensemble, x +do i = 1, ens_size + indx = sort_index(i) + quantiles(indx) = q(i) +end do + +! Compute the characteristics of tails + +! For unit normal, find distance from mean to where cdf is 1/(ens_size+1) (del_q_. +! Saved to avoid redundant computation for repeated calls with same ensemble size +del_q = 1.0_r8 / (ens_size + 1.0_r8) + +if(saved_ens_size /= ens_size) then + dist_for_unit_sd = inv_std_normal_cdf(del_q) + ! This will be negative, want it to be a distance so make it positive + dist_for_unit_sd = -1.0_r8 * dist_for_unit_sd + ! Keep a record of the ensemble size used to compute dist_for_unit_sd + saved_ens_size = ens_size +endif + +! Find a mean so that 1 / (ens_size + 1) probability is in outer regions +tail_mean_left = sort_x(1) + dist_for_unit_sd * tail_sd_left +tail_mean_right = sort_x(ens_size) - dist_for_unit_sd * tail_sd_right + +! If the distribution is bounded, still want 1 / (ens_size + 1) (del_q) in outer regions +! Put an amplitude term (greater than 1) in front of the tail normals +! Amplitude is 1 if there are no bounds, so start with that +tail_amp_left = 1.0_r8 +tail_amp_right = 1.0_r8 + +! Switch to uniform for cases where bound and outermost ensemble have close quantiles +! Default: not close +do_uniform_tail_left = .false. +if(bounded_below) then + ! Compute the CDF at the bounds + bound_quantile = normal_cdf(lower_bound, tail_mean_left, tail_sd_left) + ! Note that due to roundoff it is possible for del_q - quantile to be slightly negative + if((del_q - bound_quantile) / del_q < uniform_threshold) then + ! If bound and ensemble member are too close, do uniform approximation + do_uniform_tail_left = .true. + else + ! Compute the left tail amplitude + tail_amp_left = del_q / (del_q - bound_quantile); + endif +endif + +! Default: not close +do_uniform_tail_right = .false. +if(bounded_above) then + ! Compute the CDF at the bounds + bound_quantile = normal_cdf(upper_bound, tail_mean_right, tail_sd_right) + ! Note that due to roundoff it is possible for the numerator to be slightly negative + if((bound_quantile - (1.0_r8 - del_q)) / del_q < uniform_threshold) then + ! If bound and ensemble member are too close, do uniform approximation + do_uniform_tail_right = .true. + else + ! Compute the right tail amplitude + tail_amp_right = del_q / (del_q - (1.0_r8 - bound_quantile)) + endif +endif + +end subroutine bnrh_cdf + +!----------------------------------------------------------------------- + +subroutine bnrh_cdf_initialized_vector(x, num, p, quantiles) + +integer, intent(in) :: num +real(r8), intent(in) :: x(num) +type(distribution_params_type), intent(in) :: p +real(r8), intent(out) :: quantiles(num) + +real(r8) :: tail_amp_left, tail_mean_left, tail_sd_left +real(r8) :: tail_amp_right, tail_mean_right, tail_sd_right +logical :: do_uniform_tail_left, do_uniform_tail_right + +! Given the sorted ensemble (sort_ens) that defines a bnrh CDF and all the corresponding +! information about that distribution, computes the value of the CDF for a vector of num +! elsements (x) and returns those quantiles. + +! In the default filter usage, this is only used for doing the probit transform for the +! posterior observation ensemble. In this case, the size of vector x is the same as the +! ensemble size. For hybrid filter applications, the ensemble size defining the BNRH CDF +! might be different from the ensemble that needs updating, so x could have a different size. + +real(r8) :: q(p%ens_size) +integer :: i + +! Extract the required information from the distribution_params_type +call unpack_bnrh_params(p, do_uniform_tail_left, do_uniform_tail_right, & + tail_amp_left, tail_amp_right, tail_mean_left, tail_mean_right, tail_sd_left, tail_sd_right) + +! Compute the quantiles of each of the sorted ensemble members that define the BNRH distribution. +! This was all computed when the distribution was originally set up, could choose to cache that +! in the params structure for efficiency. This is only used for the single observation posterior +! so one could only save in that case removing any storage concerns. +call ens_quantiles(p%ens, p%ens_size, p%bounded_below, p%bounded_above, p%lower_bound, p%upper_bound, q) + +! Loop through the values in the x vector to compute the CDF at each one. +! This can be done vastly more efficiently with either binary searches or by first sorting the +! vector of values (x) for which the CDF needs to be computed +do i = 1, p%ens_size + ! Figure out which bin it is in + call bnrh_cdf_initialized(x(i), p%ens_size, p%ens, p%bounded_below, p%bounded_above, p%lower_bound, p%upper_bound, & + tail_amp_left, tail_mean_left, tail_sd_left, do_uniform_tail_left, & + tail_amp_right, tail_mean_right, tail_sd_right, do_uniform_tail_right, q, quantiles(i)) +end do + +end subroutine bnrh_cdf_initialized_vector + +!----------------------------------------------------------------------- + +subroutine bnrh_cdf_initialized(x, ens_size, sort_ens, bounded_below, bounded_above, & + lower_bound, upper_bound, & + tail_amp_left, tail_mean_left, tail_sd_left, do_uniform_tail_left, & + tail_amp_right, tail_mean_right, tail_sd_right, do_uniform_tail_right,& + q, quantile) + +real(r8), intent(in) :: x +integer, intent(in) :: ens_size +real(r8), intent(in) :: sort_ens(ens_size) +logical, intent(in) :: bounded_below, bounded_above +real(r8), intent(in) :: lower_bound, upper_bound +real(r8), intent(in) :: tail_amp_left, tail_mean_left, tail_sd_left +real(r8), intent(in) :: tail_amp_right, tail_mean_right, tail_sd_right +logical, intent(in) :: do_uniform_tail_left, do_uniform_tail_right +real(r8), intent(in) :: q(ens_size) +real(r8), intent(out) :: quantile + +real(r8) :: upper_q, fract, del_q, q_at_largest_ens +integer :: j + +! Quantile increment between ensemble members for bnrh +del_q = 1.0_r8 / (ens_size + 1.0_r8) + +if(x < sort_ens(1)) then + ! In the left tail + ! Do an error check to make sure ensemble member isn't outside bounds, may be redundant + if(bounded_below .and. x < lower_bound) then + write(errstring, *) 'Ensemble member less than lower bound', x, lower_bound + call error_handler(E_ERR, 'bnrh_cdf_initialized', errstring, source) + ! This error can occur due to roundoff in increment generation from BNRHF + ! See discussion in function fix_bounds. + endif + + if(do_uniform_tail_left) then + ! Uniform approximation for left tail; Note that denominator cannot be 0 but could be small + quantile = (x - lower_bound) / (sort_ens(1) - lower_bound) * del_q + else + ! It's a normal tail + if(bounded_below) then + quantile = tail_amp_left * (normal_cdf(x, tail_mean_left, tail_sd_left) - & + normal_cdf(lower_bound, tail_mean_left, tail_sd_left)) + else ! Unbounded, tail normal goes all the way down to quantile 0, amplitude is 1 + quantile = (normal_cdf(x, tail_mean_left, tail_sd_left) / & + normal_cdf(sort_ens(1), tail_mean_left, tail_sd_left)) * del_q + endif + ! Make sure it doesn't sneak past the quantile of the smallest ensemble member due to round-off + quantile = min(quantile, q(1)) + endif +elseif(x == sort_ens(1)) then + ! This takes care of cases where there are multiple bnrh values at the bdry or at first ensemble + quantile = q(1) +elseif(x > sort_ens(ens_size)) then + ! In the right tail + ! Do an error check to make sure ensemble member isn't outside bounds, may be redundant + if(bounded_above .and. x > upper_bound) then + write(errstring, *) 'Ensemble member greater than upper bound first check(see code)', x, upper_bound + call error_handler(E_ERR, 'bnrh_cdf_initialized', errstring, source) + ! This error can occur due to roundoff in increment generation from bounded BNRHF + ! See discussion in function fix_bounds + endif + + if(do_uniform_tail_right) then + ! Uniform approximation for right tail + ! The division here could be a concern. However, if sort_ens(ens_size) == upper_bound, then + ! x cannot be > sort_ens(ens_size). + quantile = ens_size *del_q + & + (x - sort_ens(ens_size)) / (upper_bound - sort_ens(ens_size)) * del_q + else + ! It's a normal tail + q_at_largest_ens = normal_cdf(sort_ens(ens_size), tail_mean_right, tail_sd_right) + ! Want to avoid quantiles exceeding 1 due to numerical issues. Do fraction of the normal part + if(bounded_above) then + upper_q = tail_amp_right * normal_cdf(upper_bound, tail_mean_right, tail_sd_right) + fract = (tail_amp_right * normal_cdf(x, tail_mean_right, tail_sd_right) - & + tail_amp_right * q_at_largest_ens) / (upper_q - tail_amp_right * q_at_largest_ens) + else + ! Normal goes all the way to infinity, amplitude is 1, q at infinity is 1 + fract = (normal_cdf(x, tail_mean_right, tail_sd_right) - q_at_largest_ens) / & + (1.0_r8 - q_at_largest_ens) + endif + + quantile = ens_size * del_q + fract * del_q + quantile = min(quantile, 1.0_r8) + endif + +else + ! In an interior bin + do j = 1, ens_size - 1 + if(x < sort_ens(j+1)) then + ! The division here could be a concern. + ! However, sort_ens(j)< x < sort_ens(j+1) so the two cannot be equal + quantile = j * del_q + & + ((x - sort_ens(j)) / (sort_ens(j+1) - sort_ens(j))) * del_q + exit + elseif(x == sort_ens(j+1)) then + quantile = q(j+1) + exit + endif + enddo +endif + +end subroutine bnrh_cdf_initialized + +!----------------------------------------------------------------------- +subroutine inv_bnrh_cdf_params(quantiles, ens_size, p, x) + +integer, intent(in) :: ens_size +real(r8), intent(in) :: quantiles(ens_size) +type(distribution_params_type), intent(inout) :: p +real(r8), intent(out) :: x(ens_size) + +real(r8) :: tail_amp_left, tail_mean_left, tail_sd_left +real(r8) :: tail_amp_right, tail_mean_right, tail_sd_right +logical :: do_uniform_tail_left, do_uniform_tail_right + +call unpack_bnrh_params(p, do_uniform_tail_left, do_uniform_tail_right, & + tail_amp_left, tail_amp_right, tail_mean_left, tail_mean_right, tail_sd_left, tail_sd_right) + +call inv_bnrh_cdf(quantiles, ens_size, p%ens, & + p%bounded_below, p%bounded_above, p%lower_bound, p%upper_bound, & + tail_amp_left, tail_mean_left, tail_sd_left, do_uniform_tail_left, & + tail_amp_right, tail_mean_right, tail_sd_right, do_uniform_tail_right, x) + +end subroutine inv_bnrh_cdf_params + +!----------------------------------------------------------------------- + +subroutine inv_bnrh_cdf(quantiles, ens_size, sort_ens, & + bounded_below, bounded_above, lower_bound, upper_bound, & + tail_amp_left, tail_mean_left, tail_sd_left, do_uniform_tail_left, & + tail_amp_right, tail_mean_right, tail_sd_right, do_uniform_tail_right, x) + +integer, intent(in) :: ens_size +real(r8), intent(in) :: quantiles(ens_size) +real(r8), intent(in) :: sort_ens(ens_size) +logical, intent(in) :: bounded_below, bounded_above +real(r8), intent(in) :: lower_bound, upper_bound +real(r8), intent(in) :: tail_amp_left, tail_mean_left, tail_sd_left +real(r8), intent(in) :: tail_amp_right, tail_mean_right, tail_sd_right +logical, intent(in) :: do_uniform_tail_left, do_uniform_tail_right +real(r8), intent(out) :: x(ens_size) + +integer :: region, i, j +real(r8) :: lower_state, upper_state, lower_mass, upper_mass, target_mass +real(r8) :: q(ens_size), curr_q, lower_q, upper_q, del_q, fract + +! Quantile increment between ensemble members for bnrh +del_q = 1.0_r8 / (ens_size + 1.0_r8) + +do i = 1, ens_size + q(i) = i * del_q +end do + +! Loop through each ensemble member to find posterior state +do i = 1, ens_size + curr_q = quantiles(i) + ! Which region is this quantile in? + ! BNRH quantiles are uniform; finding region for this quantile is trivial + region = floor(curr_q * (ens_size + 1.0_r8)) + ! Careful about numerical issues moving outside of region [0 ens_size] + if(region < 0) region = 0 + if(region > ens_size) region = ens_size + + if(region == 0) then + ! Lower tail + if(bounded_below .and. do_uniform_tail_left) then + ! Lower tail uniform + upper_state = sort_ens(1) + x(i) = lower_bound + (curr_q / q(1)) * (upper_state - lower_bound) + else + ! Find the mass at the lower bound (which could be unbounded) + if(bounded_below) then + lower_mass = tail_amp_left * & + normal_cdf(lower_bound, tail_mean_left, tail_sd_left) + else + lower_mass = 0.0_r8 + endif + ! Find the mass at the upper bound (ensemble member 1) + upper_mass = tail_amp_left * & + normal_cdf(sort_ens(1), tail_mean_left, tail_sd_left) + ! What fraction of this mass difference should we go? + fract = curr_q / q(1) + target_mass = lower_mass + fract * (upper_mass - lower_mass) + x(i) = inv_weighted_normal_cdf(tail_amp_left, tail_mean_left, & + tail_sd_left, target_mass) + endif + + elseif(region == ens_size) then + ! Upper tail + if(bounded_above .and. do_uniform_tail_right) then + ! Upper tail is uniform + lower_state = sort_ens(ens_size) + upper_state = upper_bound + x(i) = lower_state + (curr_q - q(ens_size)) * & + (upper_state - lower_state) / (1.0_r8 - q(ens_size)) + else + ! Upper tail is (bounded) normal + ! Find the mass at the upper bound (which could be unbounded) + if(bounded_above) then + upper_mass = tail_amp_right * & + normal_cdf(upper_bound, tail_mean_right, tail_sd_right) + else + upper_mass = 1.0_r8 + endif + ! Find the mass at the lower edge of the region (ensemble member n) + lower_mass = tail_amp_right * & + normal_cdf(sort_ens(ens_size), tail_mean_right, tail_sd_right) + ! What fraction of the last interval do we need to move + fract = (curr_q - q(ens_size)) / (1.0_r8 - q(ens_size)) + target_mass = lower_mass + fract * (upper_mass - lower_mass) + x(i) = inv_weighted_normal_cdf(tail_amp_right, tail_mean_right, & + tail_sd_right, target_mass) + endif + + else + ! Interior region; get the quantiles of the region boundary + lower_q = q(region) + upper_q = q(region + 1) + x(i) = sort_ens(region) + ((curr_q - lower_q) / (upper_q - lower_q)) * & + (sort_ens(region + 1) - sort_ens(region)) + endif + + ! Imprecision can lead to x being slightly out of bounds, fix it to bounds + call check_bounds(x(i), curr_q, bounded_below, lower_bound, & + bounded_above, upper_bound, 'inf_bnrh_cdf') +enddo + +end subroutine inv_bnrh_cdf + +!----------------------------------------------------------------------- + + +subroutine inv_bnrh_cdf_like(quantiles, ens_size, sort_ens, & + bounded_below, bounded_above, lower_bound, upper_bound, & + tail_amp_left, tail_mean_left, tail_sd_left, do_uniform_tail_left, & + tail_amp_right, tail_mean_right, tail_sd_right, do_uniform_tail_right, x, & + like) + +integer, intent(in) :: ens_size +real(r8), intent(in) :: quantiles(ens_size) +real(r8), intent(in) :: sort_ens(ens_size) +logical, intent(in) :: bounded_below, bounded_above +real(r8), intent(in) :: lower_bound, upper_bound +real(r8), intent(in) :: tail_amp_left, tail_mean_left, tail_sd_left +real(r8), intent(in) :: tail_amp_right, tail_mean_right, tail_sd_right +logical, intent(in) :: do_uniform_tail_left, do_uniform_tail_right +real(r8), intent(out) :: x(ens_size) +real(r8), intent(inout) :: like(ens_size) + +! This inverts the cdf which is optionally multiplied by a likelihood. + +integer :: region, i, j +real(r8) :: lower_state, upper_state, lower_mass, upper_mass, target_mass +real(r8) :: q(ens_size), curr_q, amp_adj, lower_q, upper_q, del_q, fract + +! Quantile increment between ensemble members for bnrh +del_q = 1.0_r8 / (ens_size + 1.0_r8) + +! Normalize the likelihood to have a sum of 1 +like = like / (sum(like) + like(1) / 2.0_r8 + like(ens_size) / 2.0_r8) + +! Go from left to right adjusting the quantiles through the x's +! Assume that the quantiles of the original ensemble for the BNRH are uniform +q(1) = like(1) +do i = 2, ens_size + q(i) = q(i - 1) + (like(i-1) + like(i)) / 2.0_r8 +end do + +! Temporary test to confirm posterior is a pdf +if(abs(q(ens_size) + like(ens_size) - 1.0_r8) > 1.0e-12) then + write(*, *) 'final q ', q(ens_size) + like(ens_size) + stop +endif + +! Loop through each ensemble member to find posterior state +do i = 1, ens_size + curr_q = quantiles(i) + ! Which region is this quantile in? + ! Find which region this quantile is in + ! Need to make this more efficient once it is working + ! Default is that region is the highest one; quantile(i) >= largest q + region = ens_size + do j = 1, ens_size + if(curr_q < q(j)) then + region = j - 1 + exit + endif + end do + + if(region == 0) then + ! Lower tail + if(bounded_below .and. do_uniform_tail_left) then + ! Lower tail uniform + upper_state = sort_ens(1) + x(i) = lower_bound + (curr_q / q(1)) * (upper_state - lower_bound) + else + ! Find the mass at the lower bound (which could be unbounded) + ! The amplitude is changed if there is a non-uniform likelihood + amp_adj = q(1) / del_q + if(bounded_below) then + lower_mass = amp_adj * tail_amp_left * & + normal_cdf(lower_bound, tail_mean_left, tail_sd_left) + else + lower_mass = 0.0_r8 + endif + ! Find the mass at the upper bound (ensemble member 1) + upper_mass = amp_adj * tail_amp_left * & + normal_cdf(sort_ens(1), tail_mean_left, tail_sd_left) + ! What fraction of this mass difference should we go? + fract = curr_q / q(1) + target_mass = lower_mass + fract * (upper_mass - lower_mass) + x(i) = inv_weighted_normal_cdf(amp_adj*tail_amp_left, tail_mean_left, & + tail_sd_left, target_mass) + endif + + elseif(region == ens_size) then + ! Upper tail + if(bounded_above .and. do_uniform_tail_right) then + ! Upper tail is uniform + lower_state = sort_ens(ens_size) + upper_state = upper_bound + x(i) = lower_state + (curr_q - q(ens_size)) * & + (upper_state - lower_state) / (1.0_r8 - q(ens_size)) + else + ! Upper tail is (bounded) normal + ! Find the mass at the upper bound (which could be unbounded) + amp_adj = (1.0_r8 - q(ens_size)) / del_q + if(bounded_above) then + upper_mass = amp_adj * tail_amp_right * & + normal_cdf(upper_bound, tail_mean_right, tail_sd_right) + else + upper_mass = amp_adj * 1.0_r8 + endif + ! Find the mass at the lower edge of the region (ensemble member n) + lower_mass = amp_adj * tail_amp_right * & + normal_cdf(sort_ens(ens_size), tail_mean_right, tail_sd_right) + ! What fraction of the last interval do we need to move + fract = (curr_q - q(ens_size)) / (1.0_r8 - q(ens_size)) + target_mass = lower_mass + fract * (upper_mass - lower_mass) + x(i) = inv_weighted_normal_cdf(amp_adj * tail_amp_right, tail_mean_right, & + tail_sd_right, target_mass) + endif + + else + ! Interior region; get the quantiles of the region boundary + lower_q = q(region) + upper_q = q(region + 1) + x(i) = sort_ens(region) + ((curr_q - lower_q) / (upper_q - lower_q)) * & + (sort_ens(region + 1) - sort_ens(region)) + endif + + ! Imprecision can lead to x being slightly out of bounds, fix it to bounds + call check_bounds(x(i), curr_q, bounded_below, lower_bound, & + bounded_above, upper_bound, 'inf_bnrh_cdf_like') +enddo + +end subroutine inv_bnrh_cdf_like + +!----------------------------------------------------------------------- + +subroutine ens_quantiles(sorted_ens, ens_size, bounded_below, bounded_above, & + lower_bound, upper_bound, q) + +! Given sorted ensemble which may have members identical to the bounds or may contain +! duplicates, compute the quantiles for each member in an bounded normal rh distribution + +integer, intent(in) :: ens_size +real(r8), intent(in) :: sorted_ens(ens_size) +logical, intent(in) :: bounded_below, bounded_above +real(r8), intent(in) :: lower_bound +real(r8), intent(in) :: upper_bound +real(r8), intent(out) :: q(ens_size) + +integer :: i, j, lower_dups, upper_dups, d_start, d_end, series_num +integer :: series_start(ens_size), series_end(ens_size), series_length(ens_size) + +! Get number of ensemble members that are duplicates of the lower bound +lower_dups = 0 +if(bounded_below) then + do i = 1, ens_size + if(sorted_ens(i) == lower_bound) then + lower_dups = lower_dups + 1 + else + exit + endif + end do +endif + +! Get number of ensemble members that are duplicates of the upper bound +upper_dups = 0 +if(bounded_above) then + do i = ens_size, 1, -1 + if(sorted_ens(i) == upper_bound) then + upper_dups = upper_dups + 1 + else + exit + endif + end do +endif + +! If there are duplicate ensemble members away from the boundaries need to revise quantiles +! Make sure not to count duplicates already handled at the boundaries +! Outer loop determines if a series of duplicates starts at sorted index i +d_start = lower_dups + 1 +d_end = ens_size - upper_dups + +! Get start, length, and end of each series of duplicates away from the bounds +series_num = 1 +series_start(series_num) = d_start +series_length(series_num) = 1 +do i = d_start + 1, d_end + if(sorted_ens(i) == sorted_ens(i - 1)) then + series_length(series_num) = series_length(series_num) + 1 + else + series_end(series_num) = i-1 + series_num = series_num + 1 + series_start(series_num) = i + series_length(series_num) = 1 + endif +end do + +! Off the end, finish up the last series +series_end(series_num) = d_end + +! Now get the value of the quantile for the exact ensemble members +! Start with the lower bound duplicates +do i = 1, lower_dups + q(i) = lower_dups / (2.0_r8 * (ens_size + 1.0_r8)) +end do + +! Top bound duplicates next +do i = ens_size - upper_dups + 1, ens_size + q(i) = 1.0_r8 - upper_dups / (2.0_r8 * (ens_size + 1.0_r8)) +end do + +! Do the interior series +do i = 1, series_num + do j = series_start(i), series_end(i) + q(j) = series_start(i) / (ens_size + 1.0_r8) + (series_length(i) - 1.0_r8) / (2.0_r8 * (ens_size + 1.0_r8)) + end do +end do + +end subroutine ens_quantiles + +!----------------------------------------------------------------------- + +subroutine pack_bnrh_params(ens_size, bounded_below, bounded_above, lower_bound, upper_bound, & + do_uniform_tail_left, do_uniform_tail_right, tail_amp_left, tail_amp_right, & + tail_mean_left, tail_mean_right, tail_sd_left, tail_sd_right, sort_ens, p) + +integer, intent(in) :: ens_size +logical, intent(in) :: bounded_below, bounded_above +real(r8), intent(in) :: lower_bound, upper_bound +logical, intent(in) :: do_uniform_tail_left, do_uniform_tail_right +real(r8), intent(in) :: tail_amp_left, tail_amp_right +real(r8), intent(in) :: tail_mean_left, tail_mean_right +real(r8), intent(in) :: tail_sd_left, tail_sd_right +real(r8), intent(in) :: sort_ens(ens_size) +type(distribution_params_type), intent(inout) :: p + +! Set the fixed storage parameters in the distribution_params_type +p%bounded_below = bounded_below; p%lower_bound = lower_bound +p%bounded_above = bounded_above; p%upper_bound = upper_bound +p%ens_size = ens_size + +! Allocate space needed for the parameters +allocate(p%ens(ens_size)) +allocate(p%more_params(2*4)) + +! Save the sorted bnrh ensemble values +p%ens = sort_ens + +! Store the extra information about the distribution in the more_params array +if(do_uniform_tail_left) then + p%more_params(1) = 1.0_r8 +else + p%more_params(1) = 0.0_r8 +endif +if(do_uniform_tail_right) then + p%more_params(2) = 1.0_r8 +else + p%more_params(2) = 0.0_r8 +endif + +p%more_params(3) = tail_amp_left; p%more_params(4) = tail_amp_right +p%more_params(5) = tail_mean_left; p%more_params(6) = tail_mean_right +p%more_params(7) = tail_sd_left; p%more_params(8) = tail_sd_right + +end subroutine pack_bnrh_params + +!----------------------------------------------------------------------- + +subroutine unpack_bnrh_params(p, do_uniform_tail_left, do_uniform_tail_right, & + tail_amp_left, tail_amp_right, tail_mean_left, tail_mean_right, tail_sd_left, tail_sd_right) + +! Unpack values describing the bnrh distribution from the distribution_params_type p + +type(distribution_params_type), intent(in) :: p +logical, intent(out) :: do_uniform_tail_left, do_uniform_tail_right +real(r8), intent(out) :: tail_amp_left, tail_amp_right +real(r8), intent(out) :: tail_mean_left, tail_mean_right +real(r8), intent(out) :: tail_sd_left, tail_sd_right + +! Logicals are stored as 1 for true, 0 for false +do_uniform_tail_left = p%more_params(1) > 0.5_r8 +do_uniform_tail_right = p%more_params(2) > 0.5_r8 + +tail_amp_left = p%more_params(3); tail_amp_right = p%more_params(4) +tail_mean_left = p%more_params(5); tail_mean_right = p%more_params(6) +tail_sd_left = p%more_params(7); tail_sd_right = p%more_params(8) + +end subroutine unpack_bnrh_params + +!----------------------------------------------------------------------- + +function get_bnrh_sd(p) + +real(r8) :: get_bnrh_sd +type(distribution_params_type), intent(in) :: p + +! Return the standard deviation of this distribution +get_bnrh_sd = p%more_params(7) + +end function get_bnrh_sd + +!----------------------------------------------------------------------- + + +subroutine check_bounds(x, q, bounded_below, lower_bound, & + bounded_above, upper_bound, msgstring) + +real(r8), intent(inout) :: x +real(r8), intent(in) :: q +logical, intent(in) :: bounded_below, bounded_above +real(r8), intent(in) :: lower_bound, upper_bound +character(len=*), intent(in) :: msgstring + +! Imprecision in inv_norm could lead to x(i) being out of bounds: check for now +! lower bound. Correct this and output a message. Could be numerically fixed above. +if(bounded_below) then + if(x < lower_bound) then + write(errstring, *) 'x less than lower_bound ', x, q + call error_handler(E_MSG, msgstring, errstring, source) + x = lower_bound + endif +endif + +! See comment on lower bound in previous code block +if(bounded_above) then + if(x > upper_bound) then + write(errstring, *) 'x greater than upper_bound ', x, q + call error_handler(E_MSG, msgstring, errstring, source) + x = upper_bound + endif +endif + +end subroutine check_bounds + +!----------------------------------------------------------------------- + +end module bnrh_distribution_mod diff --git a/assimilation_code/modules/assimilation/distribution_params_mod.f90 b/assimilation_code/modules/assimilation/distribution_params_mod.f90 new file mode 100644 index 0000000000..7951a7c7bd --- /dev/null +++ b/assimilation_code/modules/assimilation/distribution_params_mod.f90 @@ -0,0 +1,49 @@ +module distribution_params_mod + +! Provides data structure and tools to represent probability distribution families for DART + +use types_mod, only : r8 + +implicit none +private + +type distribution_params_type + integer :: distribution_type + logical :: bounded_below, bounded_above + real(r8) :: lower_bound, upper_bound + real(r8) :: params(2) + integer :: ens_size + real(r8), allocatable :: ens(:) + real(r8), allocatable :: more_params(:) +end type + +! Defining parameter strings for different prior distributions that can be used for probit transform +integer, parameter :: NORMAL_DISTRIBUTION = 1 +integer, parameter :: BOUNDED_NORMAL_RH_DISTRIBUTION = 2 +integer, parameter :: GAMMA_DISTRIBUTION = 3 +integer, parameter :: BETA_DISTRIBUTION = 4 +integer, parameter :: LOG_NORMAL_DISTRIBUTION = 5 +integer, parameter :: UNIFORM_DISTRIBUTION = 6 +integer, parameter :: PARTICLE_FILTER_DISTRIBUTION = 7 + +public :: distribution_params_type, deallocate_distribution_params, & + NORMAL_DISTRIBUTION, BOUNDED_NORMAL_RH_DISTRIBUTION, GAMMA_DISTRIBUTION, BETA_DISTRIBUTION, & + LOG_NORMAL_DISTRIBUTION, UNIFORM_DISTRIBUTION, PARTICLE_FILTER_DISTRIBUTION + +contains + +!---------------------------------------------------------------------- + +subroutine deallocate_distribution_params(p) + +type(distribution_params_type), intent(inout) :: p + +! Free up the allocatable storage +if(allocated(p%ens)) deallocate(p%ens) +if(allocated(p%more_params)) deallocate(p%more_params) + +end subroutine deallocate_distribution_params + +!---------------------------------------------------------------------- + +end module distribution_params_mod diff --git a/assimilation_code/modules/assimilation/filter_mod.f90 b/assimilation_code/modules/assimilation/filter_mod.f90 index 80aa020457..75d0560de6 100644 --- a/assimilation_code/modules/assimilation/filter_mod.f90 +++ b/assimilation_code/modules/assimilation/filter_mod.f90 @@ -35,7 +35,7 @@ module filter_mod set_multiple_filename_lists, find_textfile_dims use assim_model_mod, only : static_init_assim_model, get_model_size, & - end_assim_model, pert_model_copies + end_assim_model, pert_model_copies, get_state_meta_data use assim_tools_mod, only : filter_assim, set_assim_tools_trace, test_state_copies use obs_model_mod, only : move_ahead, advance_state, set_obs_model_trace @@ -86,6 +86,14 @@ module filter_mod use quality_control_mod, only : initialize_qc +use location_mod, only : location_type + +use probit_transform_mod, only : transform_to_probit, transform_from_probit + +use algorithm_info_mod, only : probit_dist_info, init_algorithm_info_mod, end_algorithm_info_mod + +use distribution_params_mod, only : distribution_params_type + !------------------------------------------------------------------------------ implicit none @@ -1140,6 +1148,9 @@ subroutine filter_main() call end_assim_model() call trace_message('After end_model call') +! deallocate qceff_table_data structures +call end_algorithm_info_mod() + call trace_message('Before ensemble and obs memory cleanup') call end_ensemble_manager(state_ens_handle) @@ -1262,6 +1273,10 @@ subroutine filter_initialize_modules_used() call static_init_assim_model() call state_vector_io_init() call initialize_qc() + +! Initialize algorothm_info_mod and read in QCF table data +call init_algorithm_info_mod() + call trace_message('After filter_initialize_module_used call') end subroutine filter_initialize_modules_used @@ -1543,6 +1558,13 @@ subroutine filter_ensemble_inflate(ens_handle, inflate_copy, inflate, ENS_MEAN_C integer, optional, intent(in) :: SPARE_PRIOR_SPREAD, ENS_SD_COPY integer :: j, group, grp_bot, grp_top, grp_size +type(location_type) :: my_state_loc +integer :: my_state_kind +type(distribution_params_type) :: dist_params +real(r8) :: probit_ens(ens_size), probit_ens_mean +logical :: bounded_below, bounded_above +real(r8) :: lower_bound, upper_bound +integer :: dist_type ! Assumes that the ensemble is copy complete call prepare_to_update_copies(ens_handle) @@ -1574,9 +1596,29 @@ subroutine filter_ensemble_inflate(ens_handle, inflate_copy, inflate, ENS_MEAN_C call error_handler(E_ERR,'filter_ensemble_inflate',msgstring,source) endif else + + ! This is an initial test of doing inflation in probit space + ! Note that this appears to work with adaptive inflation, but more research would be good + ! Probably also shouldn't be used with groups for now although it is coded to do so do j = 1, ens_handle%my_num_vars - call inflate_ens(inflate, ens_handle%copies(grp_bot:grp_top, j), & - ens_handle%copies(ENS_MEAN_COPY, j), ens_handle%copies(inflate_copy, j)) + call get_state_meta_data(ens_handle%my_vars(j), my_state_loc, my_state_kind) + + ! Need to specify what kind of prior to use for each + call probit_dist_info(my_state_kind, .true., .true., dist_type, & + bounded_below, bounded_above, lower_bound, upper_bound) + + call transform_to_probit(grp_size, ens_handle%copies(grp_bot:grp_top, j), & + dist_type, dist_params, probit_ens(1:grp_size), .false., & + bounded_below, bounded_above, lower_bound, upper_bound) + + ! Compute the ensemble mean in transformed space + probit_ens_mean = sum(probit_ens(1:grp_size)) / grp_size + ! Inflate in probit space + call inflate_ens(inflate, probit_ens(1:grp_size), probit_ens_mean, & + ens_handle%copies(inflate_copy, j)) + ! Transform back from probit space + call transform_from_probit(grp_size, probit_ens(1:grp_size), & + dist_params, ens_handle%copies(grp_bot:grp_top, j)) end do endif end do diff --git a/assimilation_code/modules/assimilation/gamma_distribution_mod.f90 b/assimilation_code/modules/assimilation/gamma_distribution_mod.f90 new file mode 100644 index 0000000000..953c90fe1f --- /dev/null +++ b/assimilation_code/modules/assimilation/gamma_distribution_mod.f90 @@ -0,0 +1,466 @@ +! DART software - Copyright UCAR. This open source software is provided +! by UCAR, "as is", without charge, subject to all terms of use at +! http://www.image.ucar.edu/DAReS/DART/DART_download + +module gamma_distribution_mod + +use types_mod, only : r8, PI, missing_r8 + +use utilities_mod, only : E_ERR, error_handler + +use normal_distribution_mod, only : normal_cdf, inv_cdf + +use distribution_params_mod, only : distribution_params_type + +use random_seq_mod, only : random_seq_type, random_uniform + +implicit none +private + +public :: gamma_cdf, inv_gamma_cdf, & + gamma_cdf_params, inv_gamma_cdf_params, & + random_gamma, gamma_pdf, test_gamma, gamma_mn_var_to_shape_scale, & + gamma_gamma_prod, gamma_shape_scale, set_gamma_params_from_ens + +character(len=512) :: errstring +character(len=*), parameter :: source = 'gamma_distribution_mod.f90' + +real(r8), parameter :: failed_value = -99.9_r8 + +contains + +!----------------------------------------------------------------------- + +subroutine test_gamma + +! This routine provides limited tests of the numerics in this module. It begins +! by comparing a handful of cases of the pdf and cdf to results from Matlab. It +! then tests the quality of the inverse cdf for a single shape/scale pair. Failing +! these tests suggests a serious problem. Passing them does not indicate that +! there are acceptable results for all possible inputs. + +real(r8) :: x, y, inv +real(r8) :: mean, variance, sd, gamma_shape, gamma_scale, max_diff +integer :: i + +! Comparative results for a handful of cases from MATLAB21a +real(r8) :: pdf_diff(7), cdf_diff(7) +real(r8) :: mshape(7) = [1.0_r8, 2.0_r8, 3.0_r8, 5.0_r8, 9.0_r8, 7.5_r8, 0.5_r8] +real(r8) :: mscale(7) = [2.0_r8, 2.0_r8, 2.0_r8, 1.0_r8, 0.5_r8, 1.0_r8, 1.0_r8] +real(r8) :: mx(7) = [1.0_r8, 2.0_r8, 3.0_r8, 4.0_r8, 5.0_r8, 6.0_r8, 7.0_r8] +! Generated by matlab gampdf(mx, mshape, mscale) +real(r8) :: mpdf(7) = [0.303265329856317_r8, 0.183939720585721_r8, 0.125510715083492_r8, & + 0.195366814813165_r8, 0.225198064298040_r8, 0.151385201555322_r8, & + 0.000194453010092_r8] +! Generated by matlab gamcdf(mx, mshape, mscale) +real(r8) :: mcdf(7) = [0.393469340287367_r8, 0.264241117657115_r8, 0.191153169461942_r8, & + 0.371163064820127_r8, 0.667180321249281_r8, 0.320970942909585_r8, & + 0.999817189367018_r8] + +! Compare to matlab +write(*, *) 'Absolute value of differences should be less than 1e-15' +do i = 1, 7 + pdf_diff(i) = gamma_pdf(mx(i), mshape(i), mscale(i)) - mpdf(i) + cdf_diff(i) = gamma_cdf(mx(i), mshape(i), mscale(i), .true., .false., 0.0_r8, missing_r8) - mcdf(i) + write(*, *) i, pdf_diff(i), cdf_diff(i) +end do + +! Input a mean and variance +mean = 10.0_r8 +sd = 1.0_r8 +variance = sd**2 + +! Get shape and scale +gamma_shape = mean**2 / variance +gamma_scale = variance / mean + +! Test the inversion of the cdf over +/- 5 standard deviations around mean +max_diff = -1.0_r8 +do i = 0, 1000 + x = mean + ((i - 500.0_r8) / 500.0_r8) * 5.0_r8 * sd + y = gamma_cdf(x, gamma_shape, gamma_scale, .true., .false., 0.0_r8, missing_r8) + inv = inv_gamma_cdf(y, gamma_shape, gamma_scale, .true., .false., 0.0_r8, missing_r8) + max_diff = max(abs(x-inv), max_diff) +end do + +write(*, *) '----------------------------' +write(*, *) 'max difference in inversion is ', max_diff +write(*, *) 'max difference should be less than 1e-11' + +end subroutine test_gamma + +!----------------------------------------------------------------------- + +function inv_gamma_cdf_params(quantile, p) result(x) + +real(r8) :: x +real(r8), intent(in) :: quantile +type(distribution_params_type), intent(in) :: p + +! Could do error checks for gamma_shape and gamma_scale values here +x = inv_cdf(quantile, gamma_cdf_params, inv_gamma_first_guess_params, p) + +end function inv_gamma_cdf_params +!----------------------------------------------------------------------- + +function inv_gamma_cdf(quantile, gamma_shape, gamma_scale, & + bounded_below, bounded_above, lower_bound, upper_bound) result(x) + +real(r8) :: x +real(r8), intent(in) :: quantile +real(r8), intent(in) :: gamma_shape +real(r8), intent(in) :: gamma_scale +logical, intent(in) :: bounded_below, bounded_above +real(r8), intent(in) :: lower_bound, upper_bound + +! Given a quantile q, finds the value of x for which the gamma cdf +! with shape and scale has approximately this quantile + +type(distribution_params_type) :: p + +! Load the p type for the generic cdf calls +p%params(1) = gamma_shape; p%params(2) = gamma_scale +p%bounded_below = bounded_below; p%bounded_above = bounded_above +p%lower_bound = lower_bound; p%upper_bound = upper_bound + +x = inv_gamma_cdf_params(quantile, p) + +end function inv_gamma_cdf + +!--------------------------------------------------------------------------- + +function gamma_pdf(x, gamma_shape, gamma_scale) + +! Returns the probability density of a gamma function with shape and scale +! at the value x + +real(r8) :: gamma_pdf +real(r8), intent(in) :: x, gamma_shape, gamma_scale + +! All inputs must be nonnegative +if(x < 0.0_r8 .or. gamma_shape < 0.0_r8 .or. gamma_scale < 0.0_r8) then + gamma_pdf = failed_value +else + gamma_pdf = x**(gamma_shape - 1.0_r8) * exp(-x / gamma_scale) / & + (gamma(gamma_shape) * gamma_scale**gamma_shape) +endif + +end function gamma_pdf + +!--------------------------------------------------------------------------- + +function gamma_cdf_params(x, p) + +real(r8) :: gamma_cdf_params +real(r8), intent(in) :: x +type(distribution_params_type), intent(in) :: p + +! A translation routine that is required to use the generic cdf optimization routine +! Extracts the appropriate information from the distribution_params_type that is needed +! for a call to the function gamma_cdf below. + +real(r8) :: gamma_shape, gamma_scale + +gamma_shape = p%params(1); gamma_scale = p%params(2) +gamma_cdf_params = gamma_cdf(x, gamma_shape, gamma_scale, & + p%bounded_below, p%bounded_above, p%lower_bound, p%upper_bound) + +end function gamma_cdf_params + +!--------------------------------------------------------------------------- + +function gamma_cdf(x, gamma_shape, gamma_scale, bounded_below, bounded_above, lower_bound, upper_bound) + +! Returns the cumulative distribution of a gamma function with shape and scale +! at the value x + +real(r8) :: gamma_cdf +real(r8), intent(in) :: x, gamma_shape, gamma_scale +logical, intent(in) :: bounded_below, bounded_above +real(r8), intent(in) :: lower_bound, upper_bound + +! All inputs must be nonnegative +if(x < 0.0_r8 .or. gamma_shape < 0.0_r8 .or. gamma_scale < 0.0_r8) then + gamma_cdf = failed_value +elseif(x == 0.0_r8) then + gamma_cdf = 0.0_r8 +else + ! Use definition as incomplete gamma ratio to gamma + gamma_cdf = gammad(x / gamma_scale, gamma_shape) +endif + +end function gamma_cdf + +!--------------------------------------------------------------------------- +function gammad (x, p) + +implicit none + +real(r8) :: gammad +real(r8), intent(in) :: x +real(r8), intent(in) :: p + +!*****************************************************************************80 +! +!! GAMMAD computes the Incomplete Gamma Integral +! +! Modified: +! +! 20 January 2008 +! +! Author: +! +! Original FORTRAN77 version by B Shea. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! B Shea, +! Algorithm AS 239: +! Chi-squared and Incomplete Gamma Integral, +! Applied Statistics, +! Volume 37, Number 3, 1988, pages 466-473. +! +! Parameters: +! +! Input, real ( kind = 8 ) X, P, the parameters of the incomplete +! gamma ratio. 0 <= X, and 0 < P. +! +! +! Output, real ( kind = 8 ) GAMMAD, the value of the incomplete +! Gamma integral. +! + +real(r8) :: a, b, c, an, arg, pn(6), rn +real(r8), parameter :: elimit = - 88.0_r8 +real(r8), parameter :: oflo = 1.0e+37_r8 +real(r8), parameter :: plimit = 1000.0_r8 +real(r8), parameter :: tol = 1.0e-14_r8 +real(r8), parameter :: xbig = 1.0e+08_r8 + +! x zero returns zero +if(x == 0.0_r8) then + gammad = 0.0_r8 +elseif(xbig < x) then + ! If X is large set GAMMAD = 1. + gammad = 1.0_r8 +elseif(plimit < p) then +! If P is large, use a normal approximation. + pn(1) = 3.0_r8 * sqrt(p) * ((x / p)**(1.0_r8 / 3.0_r8) + & + 1.0_r8 / (9.0_r8 * p) - 1.0_r8) + gammad = normal_cdf(pn(1), 0.0_r8, 1.0_r8) +elseif(x <= 1.0_r8 .or. x < p) then +! Use Pearson's series expansion. +! Original note: (Note that P is not large enough to force overflow in logAM). + arg = p * log(x) - x - log(gamma(p + 1.0_r8)) + c = 1.0_r8 + gammad = 1.0_r8 + a = p + + do + a = a + 1.0_r8 + c = c * x / a + gammad = gammad + c + if(c <= tol) exit + end do + + arg = arg + log(gammad) + + if(elimit <= arg) then + gammad = exp(arg) + else + gammad = 0.0_r8 + end if +else + ! Use a continued fraction expansion. + arg = p * log(x) - x - log(gamma(p)) + a = 1.0_r8 - p + b = a + x + 1.0_r8 + c = 0.0_r8 + pn(1) = 1.0_r8 + pn(2) = x + pn(3) = x + 1.0_r8 + pn(4) = x * b + gammad = pn(3) / pn(4) + + do + a = a + 1.0_r8 + b = b + 2.0_r8 + c = c + 1.0_r8 + an = a * c + pn(5) = b * pn(3) - an * pn(1) + pn(6) = b * pn(4) - an * pn(2) + + if (pn(6) /= 0.0_r8) then + rn = pn(5) / pn(6) + if(abs(gammad - rn) <= min(tol, tol * rn)) exit + gammad = rn + end if + + pn(1) = pn(3) + pn(2) = pn(4) + pn(3) = pn(5) + pn(4) = pn(6) + + ! Re-scale terms in continued fraction if terms are large. + if (oflo <= abs(pn(5))) pn(1:4) = pn(1:4) / oflo + + end do + + arg = arg + log(gammad) + + if (elimit <= arg) then + gammad = 1.0_r8 - exp(arg) + else + gammad = 1.0_r8 + endif +endif + +end function gammad + +!--------------------------------------------------------------------------- + +function random_gamma(r, rshape, rscale) + +! Note that this provides same qualitative functionality as a similarly named +! routine in the random_seq_mod that uses a rejection algorithm. However, once +! we have an inverse cdf function for a distribution, it is possible to generate +! random numbers by first getting a draw from a U(0, 1) and then inverting these +! quantiles to get an actual value + +type(random_seq_type), intent(inout) :: r +real(r8), intent(in) :: rshape +real(r8), intent(in) :: rscale +real(r8) :: random_gamma + +real(r8) :: quantile +if (rshape <= 0.0_r8) then + write(errstring, *) 'Shape parameter must be positive, was ', rshape + call error_handler(E_ERR, 'random_gamma', errstring, source) +endif + +if (rscale <= 0.0_r8) then + write(errstring, *) 'Scale parameter (scale=1/rate) must be positive, was ', rscale + call error_handler(E_ERR, 'random_gamma', errstring, source) +endif + +! Draw from U(0, 1) to get a quantile +quantile = random_uniform(r) +! Invert cdf to get a draw from gamma +random_gamma = inv_gamma_cdf(quantile, rshape, rscale, .true., .false., 0.0_r8, missing_r8) + +end function random_gamma + +!--------------------------------------------------------------------------- + +subroutine gamma_shape_scale(x, num, gamma_shape, gamma_scale) + +integer, intent(in) :: num +real(r8), intent(in) :: x(num) +real(r8), intent(out) :: gamma_shape +real(r8), intent(out) :: gamma_scale + +! This subroutine computes a shape and scale from a sample +! It first computes the mean and sd, then converts +! Note that this is NOT the maximum likelihood estimator from the sample +! and computing that would be an alternative method to get shape and scale + +real(r8) :: mean, variance + +mean = sum(x) / num +variance = sum((x - mean)**2) / (num - 1) + +call gamma_mn_var_to_shape_scale(mean, variance, gamma_shape, gamma_scale) + +end subroutine gamma_shape_scale + +!--------------------------------------------------------------------------- + +subroutine gamma_mn_var_to_shape_scale(mean, variance, gamma_shape, gamma_scale) + +real(r8), intent(in) :: mean, variance +real(r8), intent(out) :: gamma_shape, gamma_scale + +gamma_shape = mean**2 / variance +gamma_scale = variance / mean + +end subroutine gamma_mn_var_to_shape_scale + +!--------------------------------------------------------------------------- + +subroutine gamma_gamma_prod(prior_shape, prior_scale, like_shape, like_scale, & + post_shape, post_scale) + +real(r8), intent(in) :: prior_shape, prior_scale, like_shape, like_scale +real(r8), intent(out) :: post_shape, post_scale + +! Compute statistics of product of two gammas + +post_shape = prior_shape + like_shape - 1 +post_scale = prior_scale * like_scale / (prior_scale + like_scale) + +end subroutine gamma_gamma_prod + +!--------------------------------------------------------------------------- + +function inv_gamma_first_guess_params(quantile, p) + +real(r8) :: inv_gamma_first_guess_params +real(r8), intent(in) :: quantile +type(distribution_params_type), intent(in) :: p + +! A translation routine that is required to use the generic first_guess for +! the cdf optimization routine. +! Extracts the appropriate information from the distribution_params_type that is needed +! for a call to the function approx_inv_normal_cdf below (which is nothing). + +real(r8) :: gamma_shape, gamma_scale + +gamma_shape = p%params(1); gamma_scale = p%params(2) +inv_gamma_first_guess_params = inv_gamma_first_guess(quantile, gamma_shape, gamma_scale) + +end function inv_gamma_first_guess_params + +!--------------------------------------------------------------------------- + +function inv_gamma_first_guess(quantile, gamma_shape, gamma_scale) + +real(r8) :: inv_gamma_first_guess +real(r8), intent(in) :: quantile +real(r8), intent(in) :: gamma_shape, gamma_scale + +! Need some sort of first guess, should be smarter here +! For starters, take the mean for this shape and scale +inv_gamma_first_guess = gamma_shape * gamma_scale +! Could use info about sd to further refine mean and reduce iterations +!!!sd = sqrt(gamma_shape * gamma_scale**2) + +end function inv_gamma_first_guess + +!--------------------------------------------------------------------------- + +subroutine set_gamma_params_from_ens(ens, num, bounded_below, bounded_above, & + lower_bound, upper_bound, p) + +integer, intent(in) :: num +real(r8), intent(in) :: ens(num) +logical, intent(in) :: bounded_below, bounded_above +real(r8), intent(in) :: lower_bound, upper_bound +type(distribution_params_type), intent(inout) :: p + +real(r8) :: gamma_shape, gamma_scale + +! Set the bounds info +p%bounded_below = bounded_below; p%bounded_above = bounded_above +p%lower_bound = lower_bound; p%upper_bound = upper_bound + +! Get shape and scale +call gamma_shape_scale(ens, num, gamma_shape, gamma_scale) +p%params(1) = gamma_shape +p%params(2) = gamma_scale + +end subroutine set_gamma_params_from_ens + +!--------------------------------------------------------------------------- + +end module gamma_distribution_mod diff --git a/assimilation_code/modules/assimilation/normal_distribution_mod.f90 b/assimilation_code/modules/assimilation/normal_distribution_mod.f90 new file mode 100644 index 0000000000..6b0656c62d --- /dev/null +++ b/assimilation_code/modules/assimilation/normal_distribution_mod.f90 @@ -0,0 +1,543 @@ +! DART software - Copyright UCAR. This open source software is provided +! by UCAR, "as is", without charge, subject to all terms of use at +! http://www.image.ucar.edu/DAReS/DART/DART_download + +module normal_distribution_mod + +use types_mod, only : r8, missing_r8, digits12, PI + +use utilities_mod, only : E_ERR, E_MSG, error_handler + +use distribution_params_mod, only : distribution_params_type, NORMAL_DISTRIBUTION + +implicit none +private + +public :: normal_cdf, inv_std_normal_cdf, inv_weighted_normal_cdf, test_normal, & + normal_mean_variance, normal_mean_sd, inv_cdf, set_normal_params_from_ens + +character(len=512) :: errstring +character(len=*), parameter :: source = 'normal_distribution_mod.f90' + +! These quantiles bracket the range over which inv_std_normal_cdf functions +! The test routines are confined to this range and values outside this are +! changed to these. Approximate correpsonding standard deviations are in +! min_sd and max_sd and these are the range over which the test_normal functions. +! The max_sd is smaller in magnitude than the min_sd because the Fortran number +! model cannot represent numbers as close to 1 as it can to 0. +real(r8), parameter :: min_quantile = 0.0_r8, max_quantile = 0.999999999999999_r8 +real(r8), parameter :: min_sd = -30.0_r8, max_sd = 8.0_r8 + +contains + +!------------------------------------------------------------------------ + +subroutine test_normal + +! This routine provides limited tests of the numerics in this module. It begins +! by comparing a handful of cases of the cdf to results from Matlab. It +! then tests the quality of the inverse cdf for a single mean/sd. Failing +! these tests suggests a serious problem. Passing them does not indicate that +! there are acceptable results for all possible inputs. + +! Set number of equally spaced trials for the test F-1(F(x)) where F is the CDF. +integer, parameter :: num_trials = 10000000 + +integer :: i, j +real(r8) :: sd, quantile, inv, max_diff(16), max_q(16), max_matlab_diff + +! Comparative results for a handful of cases from MATLAB21a +real(r8) :: cdf_diff(7) +real(r8) :: mmean(7) = [0.0_r8, 1.0_r8, -1.0_r8, 0.0_r8, 0.0_r8, 0.0_r8, 0.5_r8] +real(r8) :: msd(7) = [0.5_r8, 1.0_r8, 2.0_r8, 4.0_r8, 5.0_r8, 6.0_r8, 0.25_r8] +real(r8) :: mx(7) = [0.1_r8, 0.2_r8, 0.3_r8, 0.4_r8, 0.5_r8, 0.6_r8, 0.7_r8] +! Generated by matlab normcdf(mx, mmean, msd) +real(r8) :: mcdf(7) = [0.579259709439103_r8, 0.211855398583397_r8, 0.742153889194135_r8, & + 0.539827837277029_r8, 0.539827837277029_r8, 0.539827837277029_r8, & + 0.788144601416603_r8] +! Bounds for quantile inversion differences +real(r8) :: inv_diff_bound(16) = [1e-10_r8, 1e-10_r8, 1e-10_r8, 1e-10_r8, 1e-10_r8, & + 1e-9_r8, 1e-8_r8, 1e-7_r8, 1e-7_r8, 1e-6_r8, & + 1e-5_r8, 1e-4_r8, 1e-3_r8, 1e-2_r8, 1e-1_r8, 1e-0_r8] + +! Compare to matlab +! Absolute value of differences should be less than 1e-15 +do i = 1, 7 + cdf_diff(i) = normal_cdf(mx(i), mmean(i), msd(i)) - mcdf(i) +end do +max_matlab_diff = maxval(abs(cdf_diff)) +if(max_matlab_diff > 1.0e-15_r8) then + write(*, *) 'WARNING: Difference from Matlab baseline is too large ', max_matlab_diff +else + write(*, *) 'Agreement with Matlab baseline is okay: max diff is < 1e-15 ', max_matlab_diff +endif + +! Keep track of differences as function of quantile +max_diff = 0.0_r8 +do j = 1, 16 + max_q(j) = 1.0_r8 - 0.1**j +enddo + +! Test the inversion of the cdf over +/- 30 standard deviations around mean +do i = 1, num_trials + 1 + sd = min_sd + (i - 1.0_r8) * (max_sd - min_sd) / num_trials + quantile = normal_cdf(sd, 0.0_r8, 1.0_r8) + inv = inv_std_normal_cdf(quantile) + do j = 1, 16 + if(quantile < max_q(j)) then + max_diff(j) = max(abs(sd-inv), max_diff(j)) + endif + enddo +end do + +do j = 1, 16 + if(max_diff(j) > inv_diff_bound(j)) then + write(*, *) 'WARNING: Max inversion diff ', max_diff(j), ' > bound ', inv_diff_bound(j), & + 'for quantiles < ', max_q(j) + else + write(*, *) 'Max inversion diff ', max_diff(j), ' OK, bound ', inv_diff_bound(j), & + 'for quantiles < ', max_q(j) + endif +end do + +end subroutine test_normal + +!------------------------------------------------------------------------ + +function normal_cdf_params(x, p) + +real(r8) :: normal_cdf_params +real(r8), intent(in) :: x +type(distribution_params_type), intent(in) :: p + +! A translation routine that is required to use the generic cdf optimization routine +! Extracts the appropriate information from the distribution_params_type that is needed +! for a call to the function normal_cdf below. + +real(r8) :: mean, sd + +mean = p%params(1); sd = p%params(2) +normal_cdf_params = normal_cdf(x, mean, sd) + +end function normal_cdf_params + +!------------------------------------------------------------------------ + +function normal_cdf(x_in, mean, sd) + +! Approximate cumulative distribution function for normal + +real(r8) :: normal_cdf +real(r8), intent(in) :: x_in +real(r8), intent(in) :: mean, sd + +real(digits12) :: nx + +! Convert to a standard normal +nx = (x_in - mean) / sd + +if(nx < 0.0_digits12) then + normal_cdf = 0.5_digits12 * erfc(-nx / sqrt(2.0_digits12)) +else + normal_cdf = 0.5_digits12 * (1.0_digits12 + erf(nx / sqrt(2.0_digits12))) +endif + +end function normal_cdf + +!------------------------------------------------------------------------ + +function inv_weighted_normal_cdf(alpha, mean, sd, q) result(x) + +! Find the value of x for which the cdf of a N(mean, sd) multiplied times +! alpha has value q. + +real(r8) :: x +real(r8), intent(in) :: alpha, mean, sd, q + +real(r8) :: normalized_q + +! VARIABLES THROUGHOUT NEED TO SWITCH TO DIGITS_12 + +! Can search in a standard normal, then multiply by sd at end and add mean +! Divide q by alpha to get the right place for weighted normal +normalized_q = q / alpha + +! Find spot in standard normal +x = inv_std_normal_cdf(normalized_q) + +! Add in the mean and normalize by sd +x = mean + x * sd + +end function inv_weighted_normal_cdf + + +!------------------------------------------------------------------------ + +function approx_inv_normal_cdf_params(quantile, p) + +real(r8) :: approx_inv_normal_cdf_params +real(r8), intent(in) :: quantile +type(distribution_params_type), intent(in) :: p + +! A translation routine that is required to use the generic first_guess for +! the cdf optimization routine. +! Extracts the appropriate information from the distribution_params_type that is needed +! for a call to the function approx_inv_normal_cdf below (which is nothing). + +approx_inv_normal_cdf_params = approx_inv_normal_cdf(quantile) + +end function approx_inv_normal_cdf_params + +!------------------------------------------------------------------------ + +function approx_inv_normal_cdf(quantile_in) result(x) + +real(r8) :: x +real(r8), intent(in) :: quantile_in + +! This is used to get a good first guess for the search in inv_std_normal_cdf +! The params argument is not needed here but is required for consistency & +! with other distributions + +! normal inverse +! translate from http://home.online.no/~pjacklam/notes/invnorm +! a routine written by john herrero + +real(r8) :: quantile +real(r8) :: quantile_low,quantile_high +real(r8) :: a1,a2,a3,a4,a5,a6 +real(r8) :: b1,b2,b3,b4,b5 +real(r8) :: c1,c2,c3,c4,c5,c6 +real(r8) :: d1,d2,d3,d4 +real(r8) :: r, s + +! Truncate out of range quantiles, converts them to smallest positive number or largest number <1 +! This solution is stable, but may lead to underflows being thrown. May want to +! think of a better solution. +quantile = quantile_in +if(quantile <= 0.0_r8) quantile = tiny(quantile_in) +if(quantile >= 1.0_r8) quantile = nearest(1.0_r8, -1.0_r8) + +a1 = -39.69683028665376_digits12 +a2 = 220.9460984245205_digits12 +a3 = -275.9285104469687_digits12 +a4 = 138.357751867269_digits12 +a5 = -30.66479806614716_digits12 +a6 = 2.506628277459239_digits12 +b1 = -54.4760987982241_digits12 +b2 = 161.5858368580409_digits12 +b3 = -155.6989798598866_digits12 +b4 = 66.80131188771972_digits12 +b5 = -13.28068155288572_digits12 +c1 = -0.007784894002430293_digits12 +c2 = -0.3223964580411365_digits12 +c3 = -2.400758277161838_digits12 +c4 = -2.549732539343734_digits12 +c5 = 4.374664141464968_digits12 +c6 = 2.938163982698783_digits12 +d1 = 0.007784695709041462_digits12 +d2 = 0.3224671290700398_digits12 +d3 = 2.445134137142996_digits12 +d4 = 3.754408661907416_digits12 +quantile_low = 0.02425_digits12 +quantile_high = 1_digits12 - quantile_low +! Split into an inner and two outer regions which have separate fits +if(quantile < quantile_low) then + s = sqrt(-2.0_digits12 * log(quantile)) + x = (((((c1*s + c2)*s + c3)*s + c4)*s + c5)*s + c6) / & + ((((d1*s + d2)*s + d3)*s + d4)*s + 1.0_digits12) +else if(quantile > quantile_high) then + s = sqrt(-2.0_digits12 * log(1.0_digits12 - quantile)) + x = -(((((c1*s + c2)*s + c3)*s + c4)*s + c5)*s + c6) / & + ((((d1*s + d2)*s + d3)*s + d4)*s + 1.0_digits12) +else + s = quantile - 0.5_digits12 + r = s*s + x = (((((a1*r + a2)*r + a3)*r + a4)*r + a5)*r + a6)*s / & + (((((b1*r + b2)*r + b3)*r + b4)*r + b5)*r + 1.0_digits12) +endif + +end function approx_inv_normal_cdf + +!------------------------------------------------------------------------ + +function inv_std_normal_cdf_params(quantile, p) result(x) + +real(r8) :: x +real(r8), intent(in) :: quantile +type(distribution_params_type), intent(in) :: p + +x = inv_cdf(quantile, normal_cdf_params, approx_inv_normal_cdf_params, p) + + +end function inv_std_normal_cdf_params + +!------------------------------------------------------------------------ + +function inv_std_normal_cdf(quantile) result(x) + +real(r8) :: x +real(r8), intent(in) :: quantile + +! This naive Newton method is much more accurate than approx_inv_normal_cdf, especially +! for quantile values less than 0.5. + +! Given a quantile q, finds the value of x for which the standard normal cdf +! has approximately this quantile + +! Where should the stupid p type come from +type(distribution_params_type) :: p +real(r8) :: mean, sd + +! Set the mean and sd to 0 and 1 for standard normal +mean = 0.0_r8; sd = 1.0_r8 +p%params(1) = mean; p%params(2) = sd + +x = inv_std_normal_cdf_params(quantile, p) + +end function inv_std_normal_cdf + +!------------------------------------------------------------------------ + +function inv_cdf(quantile_in, cdf, first_guess, p) result(x) + +interface + function cdf(x, p) + use types_mod, only : r8 + use distribution_params_mod, only : distribution_params_type + real(r8) :: cdf + real(r8), intent(in) :: x + type(distribution_params_type), intent(in) :: p + end function +end interface + +interface + function first_guess(quantile, p) + use types_mod, only : r8 + use distribution_params_mod, only : distribution_params_type + real(r8) :: first_guess + real(r8), intent(in) :: quantile + type(distribution_params_type), intent(in) :: p + end function +end interface + +real(r8) :: x +real(r8), intent(in) :: quantile_in +type(distribution_params_type), intent(in) :: p + +! This naive Newton method is much more accurate than approx_inv_normal_cdf, especially +! for quantile values less than 0.5. + +! Given a quantile q, finds the value of x for which the standard normal cdf +! has approximately this quantile + +! Limit on the total iterations; Increasing this does not change any of the results +! that do not converge for the test_normal call on gfortran. +integer, parameter :: max_iterations = 50 + +! Limit on number of times to halve the increment; No deep thought. +integer, parameter :: max_half_iterations = 25 + +real(r8) :: quantile +real(r8) :: reltol, dq_dx, delta +real(r8) :: x_guess, q_guess, x_new, q_new, del_x, del_q, del_q_old, q_old +integer :: iter, j + +real(r8) :: lower_bound, upper_bound +logical :: bounded_below, bounded_above + +! Extract the required information from the p type +bounded_below = p%bounded_below; bounded_above = p%bounded_above +lower_bound = p%lower_bound; upper_bound = p%upper_bound + +quantile = quantile_in + +! Do a test for illegal values on the quantile +if(quantile < 0.0_r8 .or. quantile > 1.0_r8) then + ! Need an error message + write(errstring, *) 'Illegal Quantile input', quantile + call error_handler(E_ERR, 'inv_cdf', errstring, source) +endif + +! If the distribution is bounded, quantiles at the limits have values at the bounds +if(bounded_below .and. quantile == 0.0_r8) then + x = lower_bound + return +endif +if(bounded_above .and. quantile == 1.0_r8) then + x = upper_bound + return +endif + +! If input quantiles are outside the numerically supported range, move them to the extremes +quantile = min(quantile, max_quantile) +! code tests stably for many distributions with min_quantile of 0.0, could remove this +quantile = max(quantile, min_quantile) + +! Get first guess from functional approximation +x_guess = first_guess(quantile, p) + +! Evaluate the cdf +q_guess = cdf(x_guess, p) + +del_q = q_guess - quantile + +! Iterations of the Newton method to approximate the root +do iter = 1, max_iterations + ! Analytically, the PDF is derivative of CDF but this can be numerically inaccurate for extreme values + ! Use numerical derivatives of the CDF to get more accurate inversion + ! These values for the delta for the approximation work with Gfortran + delta = max(1e-8_r8, 1e-8_r8 * abs(x_guess)) + dq_dx = (cdf(x_guess + delta, p) - cdf(x_guess - delta, p)) / (2.0_r8 * delta) + ! Derivative of 0 means we're not going anywhere else + if(dq_dx <= 0.0_r8) then + x = x_guess + return + endif + + ! Linear approximation for how far to move in x + del_x = del_q / dq_dx + x_new = x_guess - del_x + + ! Look for convergence; If the change in x is smaller than approximate precision + reltol = (epsilon(x_guess))**(0.75_r8) + if(abs(del_x) <= reltol) then + x = x_new + return + endif + + ! If we've gone too far, the new error will be bigger than the old; + ! Repeatedly half the distance until this is rectified + del_q_old = del_q + q_new = cdf(x_new, p) + do j = 1, max_half_iterations + del_q = q_new - quantile + if (abs(del_q) < abs(del_q_old)) then + exit + endif + q_old = q_new + x_new = (x_guess + x_new)/2.0_r8 + q_new = cdf(x_new, p) + ! If q isn't changing, no point in continuing + if(q_old == q_new) exit + + end do + + x_guess = x_new +end do + +! For now, have switched a failed convergence to return the latest guess +! This has implications for stability of probit algorithms that require further study +! Not currently happening for any of the test cases on gfortran +x = x_new +write(errstring, *) 'Failed to converge for quantile ', quantile +call error_handler(E_MSG, 'inv_cdf', errstring, source) +!!!call error_handler(E_ERR, 'inv_cdf', errstring, source) + +end function inv_cdf + +!------------------------------------------------------------------------ + +function std_normal_pdf(x) + +! Pdf of standard normal evaluated at x +real(r8) :: std_normal_pdf +real(r8), intent(in) :: x + +std_normal_pdf = exp(-0.5_r8 * x**2) / (sqrt(2.0_r8 * PI)) + +end function std_normal_pdf + +!------------------------------------------------------------------------ + +subroutine normal_mean_variance(x, num, mean, variance) + +integer, intent(in) :: num +real(r8), intent(in) :: x(num) +real(r8), intent(out) :: mean +real(r8), intent(out) :: variance + +mean = sum(x) / num +variance = sum((x - mean)**2) / (num - 1) + +end subroutine normal_mean_variance + +!------------------------------------------------------------------------ + +subroutine normal_mean_sd(x, num, mean, sd) + +integer, intent(in) :: num +real(r8), intent(in) :: x(num) +real(r8), intent(out) :: mean +real(r8), intent(out) :: sd + +mean = sum(x) / num +sd = sqrt(sum((x - mean)**2) / (num - 1)) + +end subroutine normal_mean_sd + +!------------------------------------------------------------------------ + +subroutine set_normal_params_from_ens(ens, num, p) + +integer, intent(in) :: num +real(r8), intent(in) :: ens(num) +type(distribution_params_type), intent(inout) :: p + +! Set up the description of the normal distribution defined by the ensemble +p%distribution_type = NORMAL_DISTRIBUTION + +! The two meaningful params are the mean and standard deviation +call normal_mean_sd(ens, num, p%params(1), p%params(2)) + + +end subroutine set_normal_params_from_ens + +!------------------------------------------------------------------------ +subroutine inv_cdf_quadrature_like(quantiles, ens, likelihood, ens_size, cdf, p, x_out) + +interface + function cdf(x, p) + use types_mod, only : r8 + use distribution_params_mod, only : distribution_params_type + real(r8) :: cdf + real(r8), intent(in) :: x + type(distribution_params_type), intent(in) :: p + end function +end interface + +integer, intent(in) :: ens_size +real(r8), intent(in) :: quantiles(ens_size) +real(r8), intent(in) :: ens(ens_size) +real(r8), intent(in) :: likelihood(ens_size) +type(distribution_params_type), intent(in) :: p +real(r8), intent(out) :: x_out(ens_size) + +integer :: i +real(r8) :: quad_like(ens_size + 1), q_ens(ens_size + 1) + +! Assume that the quantiles and the corresponding ens are sorted + +! Get the likelihood for each of the ens_size + 1 intervals +do i = 2, ens_size + quad_like(i) = (likelihood(i - 1) + likelihood(i)) / 2.0_r8 +end do +quad_like(1) = likelihood(1) +quad_like(ens_size + 1) = likelihood(ens_size) + +! Compute the quantiles at the ensemble boundaries for the posterior +q_ens(1) = quad_like(1) * quantiles(1) +do i = 2, ens_size + q_ens(i) = q_ens(i - 1) + quad_like(i) * (quantiles(i) - quantiles(i - 1)) +end do +q_ens(ens_size + 1) = q_ens(ens_size) + & + quad_like(ens_size + 1) * (1.0_r8 - quantiles(ens_size)) + +! Normalize so that this is a posterior cdf +q_ens = q_ens / q_ens(ens_size + 1) + +end subroutine inv_cdf_quadrature_like + +!------------------------------------------------------------------------ + +end module normal_distribution_mod diff --git a/assimilation_code/modules/assimilation/probit_transform_mod.f90 b/assimilation_code/modules/assimilation/probit_transform_mod.f90 new file mode 100644 index 0000000000..f0ea534a27 --- /dev/null +++ b/assimilation_code/modules/assimilation/probit_transform_mod.f90 @@ -0,0 +1,733 @@ +! DART software - Copyright UCAR. This open source software is provided +! by UCAR, "as is", without charge, subject to all terms of use at +! http://www.image.ucar.edu/DAReS/DART/DART_download + +! A variety of PDFs, CDFs, quantile functions and other tools for working with distributions +! to implement quantile conserving filters in observation space and regression in quantile space. + +module probit_transform_mod + +use types_mod, only : r8, missing_r8 + +use sort_mod, only : index_sort + +use utilities_mod, only : E_ERR, error_handler, do_nml_file, do_nml_term, nmlfileunit, & + find_namelist_in_file, check_namelist_read + +use distribution_params_mod, only : distribution_params_type, deallocate_distribution_params, & + NORMAL_DISTRIBUTION, BOUNDED_NORMAL_RH_DISTRIBUTION, & + GAMMA_DISTRIBUTION, BETA_DISTRIBUTION, & + LOG_NORMAL_DISTRIBUTION, UNIFORM_DISTRIBUTION, & + PARTICLE_FILTER_DISTRIBUTION + +use normal_distribution_mod, only : normal_cdf, inv_std_normal_cdf + +use gamma_distribution_mod, only : gamma_cdf_params, inv_gamma_cdf_params, & + set_gamma_params_from_ens + +use beta_distribution_mod, only : beta_cdf_params, inv_beta_cdf_params, & + set_beta_params_from_ens + +use bnrh_distribution_mod, only : bnrh_cdf_initialized_vector, bnrh_cdf_params, & + inv_bnrh_cdf_params, get_bnrh_sd + +implicit none +private + +public :: transform_to_probit, transform_from_probit, transform_all_to_probit, & + transform_all_from_probit + +character(len=512) :: errstring +character(len=*), parameter :: source = 'probit_transform_mod.f90' + +! Global to indicate module has been initialized +logical :: module_initialized = .false. + +! Namelist with default value +! Logical to fix bounds violations for bounded_normal_rh +logical :: fix_bound_violations = .false. +! Should we use a logit transform instead of the default probit transform +logical :: use_logit_instead_of_probit = .false. +! Set to true to do a check of the probit to/from transforms for inverse accuracy +logical :: do_inverse_check = .false. + +namelist /probit_transform_nml/ fix_bound_violations, & + use_logit_instead_of_probit, do_inverse_check + +contains + +!------------------------------------------------------------------------ + +subroutine transform_all_to_probit(ens_size, num_vars, state_ens, distribution_type, & + p, probit_ens, use_input_p, bounded_below, bounded_above, lower_bound, upper_bound) + +integer, intent(in) :: ens_size +integer, intent(in) :: num_vars +real(r8), intent(in) :: state_ens(:, :) +integer, intent(in) :: distribution_type(num_vars) +type(distribution_params_type), intent(inout) :: p(num_vars) +real(r8), intent(out) :: probit_ens(:, :) +logical, intent(in) :: use_input_p +logical, intent(in) :: bounded_below, bounded_above +real(r8), intent(in) :: lower_bound, upper_bound + + +! NOTE THAT WILL MAKE HELEN CRAZY: THIS WORKS WITH THE INPUT CALLING ARGUMENTS FOR STATE_ENS AND +! PROBIT_ENS BEING THE SAME. A TEMP IS USED TO AVOID OVERWRITING ISSUES. IS THIS YUCKY? + +! Note that the input and output arrays may have extra copies (first subscript). Passing sections of a +! leading index could be inefficient for time and storage, so avoiding that for now. + +! Assumes that the bounds are the same for any variables that are BNRH for now +! The bounds variables are not used for the normal case or the case where the input p is used + +integer :: i +real(r8) :: temp_ens(ens_size) + +do i = 1, num_vars + call transform_to_probit(ens_size, state_ens(1:ens_size, i), distribution_type(i), & + p(i), temp_ens, use_input_p, bounded_below, bounded_above, lower_bound, upper_bound) + probit_ens(1:ens_size, i) = temp_ens +end do + +end subroutine transform_all_to_probit + +!------------------------------------------------------------------------ + +subroutine transform_to_probit(ens_size, state_ens_in, distribution_type, p, & + probit_ens, use_input_p, bounded_below, bounded_above, lower_bound, upper_bound) + +integer, intent(in) :: ens_size +real(r8), intent(in) :: state_ens_in(ens_size) +integer, intent(in) :: distribution_type +type(distribution_params_type), intent(inout) :: p +real(r8), intent(out) :: probit_ens(ens_size) +logical, intent(in) :: use_input_p +logical, intent(in) :: bounded_below, bounded_above +real(r8), intent(in) :: lower_bound, upper_bound + +real(r8) :: state_ens(ens_size) +real(r8) :: probit_ens_temp(ens_size), state_ens_temp(ens_size), diff(ens_size) +type(distribution_params_type) :: p_temp +integer :: i + +! If not initialized, read in the namelist +if(.not. module_initialized) call initialize_probit_transform + +! Fix bounds violations if requested +if(fix_bound_violations) then + do i = 1, ens_size + state_ens(i) = fix_bounds(state_ens_in(i), bounded_below, bounded_above, & + lower_bound, upper_bound) + end do +else + state_ens = state_ens_in +endif + +! Set the type of the distribution in the parameters defined type +p%distribution_type = distribution_type + +if(p%distribution_type == NORMAL_DISTRIBUTION) then + ! No transformation is done for a normal + probit_ens = state_ens +elseif(p%distribution_type == LOG_NORMAL_DISTRIBUTION) then + call to_probit_log_normal(ens_size, state_ens, probit_ens) +elseif(p%distribution_type == UNIFORM_DISTRIBUTION) then + call to_probit_uniform(ens_size, state_ens, p, probit_ens, use_input_p, lower_bound, upper_bound) +elseif(p%distribution_type == GAMMA_DISTRIBUTION) then + call to_probit_gamma(ens_size, state_ens, p, probit_ens, use_input_p, & + bounded_below, bounded_above, lower_bound, upper_bound) +elseif(p%distribution_type == BETA_DISTRIBUTION) then + call to_probit_beta(ens_size, state_ens, p, probit_ens, use_input_p, & + lower_bound, upper_bound) +elseif(p%distribution_type == BOUNDED_NORMAL_RH_DISTRIBUTION) then + call to_probit_bounded_normal_rh(ens_size, state_ens, p, probit_ens, & + use_input_p, bounded_below, bounded_above, lower_bound, upper_bound) + +!---------------------------------------------------------------------------------- +! The following code block tests that the to/from probit calls are nearly inverse +! for all of the calls made during an assimilation + if(do_inverse_check) then + if(.not. use_input_p) then + call to_probit_bounded_normal_rh(ens_size, state_ens, p_temp, probit_ens_temp, & + use_input_p, bounded_below, bounded_above, lower_bound, upper_bound) + call from_probit_bounded_normal_rh(ens_size, probit_ens_temp, p_temp, state_ens_temp) + diff = state_ens - state_ens_temp + if(abs(maxval(diff)) > 1.0e-8_r8) then + write(*, *) 'Maximum allowed value of probit to/from difference exceeded' + write(*, *) 'Location of minimum ensemble member ', minloc(state_ens) + write(*, *) 'Location of maximum ensemble member ', maxloc(state_ens) + do i = 1, ens_size + write(*, *) i, state_ens(i), state_ens_temp(i), diff(i) + enddo + stop + endif + endif + + if(use_input_p) then + call to_probit_bounded_normal_rh(ens_size, state_ens, p, probit_ens_temp, & + use_input_p, bounded_below, bounded_above, lower_bound, upper_bound) + call from_probit_bounded_normal_rh(ens_size, probit_ens_temp, p, state_ens_temp) + diff = state_ens - state_ens_temp + if(abs(maxval(diff)) > 1.0e-8_r8) then + write(*, *) 'Maximum allowed value of probit to/from difference for input p exceeded' + write(*, *) 'Location of minimum ensemble member ', minloc(state_ens) + write(*, *) 'Location of maximum ensemble member ', maxloc(state_ens) + do i = 1, ens_size + write(*, *) i, state_ens(i), state_ens_temp(i), diff(i) + enddo + stop + endif + + endif + endif +!---------------------------------------------------------------------------------- + + +!!!elseif(p%distribution_type == PARTICLE_FILTER_DISTRIBUTION) then + !!!call to_probit_particle(ens_size, state_ens, p, probit_ens, use_input_p, & + !!!bounded_below, bounded_above, lower_bound, upper_bound) +else + write(errstring, *) 'Illegal distribution type', p%distribution_type + call error_handler(E_ERR, 'transform_to_probit', errstring, source) +endif + +end subroutine transform_to_probit + +!------------------------------------------------------------------------ + +subroutine to_probit_log_normal(ens_size, state_ens, probit_ens) + +integer, intent(in) :: ens_size +real(r8), intent(in) :: state_ens(ens_size) +real(r8), intent(out) :: probit_ens(ens_size) + +! Taking the logarithm leads directly to a normal distribution +! This normal may not be standard normal, but needs no further adjustment like +! the regular normal +probit_ens = log(state_ens) + +end subroutine to_probit_log_normal + +!------------------------------------------------------------------------ + +subroutine to_probit_uniform(ens_size, state_ens, p, probit_ens, use_input_p, & + lower_bound_in, upper_bound_in) + +integer, intent(in) :: ens_size +real(r8), intent(in) :: state_ens(ens_size) +type(distribution_params_type), intent(inout) :: p +real(r8), intent(out) :: probit_ens(ens_size) +logical, intent(in) :: use_input_p +real(r8), intent(in) :: lower_bound_in, upper_bound_in + +real(r8) :: lower_bound, upper_bound, d_range, quantile +integer :: i + +! There is no distribution_mod for uniform at the moment so params setup is done here +if(use_input_p) then + lower_bound = p%lower_bound + upper_bound = p%upper_bound +else + lower_bound = lower_bound_in + upper_bound = upper_bound_in + ! Save the bounds in the distribution_params_type + p%lower_bound = lower_bound + p%upper_bound = upper_bound +endif + +d_range = upper_bound - lower_bound +do i = 1, ens_size + ! Transform to quantile; U(lower_bound, upper_bound) to U(0, 1) + quantile = (state_ens(i) - lower_bound) / d_range + ! Transform to probit/logit space + probit_ens(i) = probit_or_logit_transform(quantile) +end do + +end subroutine to_probit_uniform + +!------------------------------------------------------------------------ + +subroutine to_probit_gamma(ens_size, state_ens, p, probit_ens, use_input_p, & + bounded_below, bounded_above, lower_bound, upper_bound) + +integer, intent(in) :: ens_size +real(r8), intent(in) :: state_ens(ens_size) +type(distribution_params_type), intent(inout) :: p +real(r8), intent(out) :: probit_ens(ens_size) +logical, intent(in) :: use_input_p +logical, intent(in) :: bounded_below, bounded_above +real(r8), intent(in) :: lower_bound, upper_bound + +! Probit transform for gamma. +real(r8) :: quantile +integer :: i + +! Bounds other than a lower bound at 0 not yet implemented for gamma distribution + +! Get the parameters for this distribution if not already available +if(.not. use_input_p) then + + ! In full generality, gamma must be bounded either below or above + if(.not. (bounded_below .neqv. bounded_above)) then + errstring = 'Gamma distribution requires either bounded above or below to be true' + call error_handler(E_ERR, 'to_probit_gamma', errstring, source) + endif + + call set_gamma_params_from_ens(state_ens, ens_size, bounded_below, bounded_above, & + lower_bound, upper_bound, p) +endif + +do i = 1, ens_size + ! First, get the quantile for this ensemble member + quantile = gamma_cdf_params(state_ens(i), p) + ! Transform to probit space + probit_ens(i) = probit_or_logit_transform(quantile) +end do + +end subroutine to_probit_gamma + +!------------------------------------------------------------------------ + +subroutine to_probit_beta(ens_size, state_ens, p, probit_ens, use_input_p, & + lower_bound, upper_bound) + +integer, intent(in) :: ens_size +real(r8), intent(in) :: state_ens(ens_size) +type(distribution_params_type), intent(inout) :: p +real(r8), intent(out) :: probit_ens(ens_size) +logical, intent(in) :: use_input_p +real(r8), intent(in) :: lower_bound, upper_bound + +! Probit transform for beta. +real(r8) :: quantile +integer :: i + +! Get the parameters for this distribution if not already available +if(.not. use_input_p) then + call set_beta_params_from_ens(state_ens, ens_size, lower_bound, upper_bound, p) +endif + +do i = 1, ens_size + ! First, get the quantile for this ensemble member + quantile = beta_cdf_params(state_ens(i), p) + ! Transform to probit/logit space + probit_ens(i) = probit_or_logit_transform(quantile) +end do + +end subroutine to_probit_beta + +!------------------------------------------------------------------------ + +subroutine to_probit_bounded_normal_rh(ens_size, state_ens, p, probit_ens, & + use_input_p, bounded_below, bounded_above, lower_bound, upper_bound) + +! Note that this is just for transforming back and forth, not for doing the RHF observation update +! This means that we know a prior that the quantiles associated with the initial ensemble are +! uniformly spaced which can be used to simplify transforming. + +! How to handle identical ensemble members is an open question for now. This is also a problem +! for ensemble members that are identical to one of the bounds. + +integer, intent(in) :: ens_size +real(r8), intent(in) :: state_ens(ens_size) +type(distribution_params_type), intent(inout) :: p +real(r8), intent(out) :: probit_ens(ens_size) +logical, intent(in) :: use_input_p +logical, intent(in) :: bounded_below, bounded_above +real(r8), intent(in) :: lower_bound, upper_bound + +! Probit transform for bounded normal rh. +integer :: i +real(r8) :: quantile(ens_size) + +if(use_input_p) then + ! Do not know what to do if sd of original ensemble is 0 (or small, work on this later) + if(get_bnrh_sd(p) <= 0.0_r8) then + ! Just return the original ensemble + probit_ens = state_ens + return + endif + + ! Get the quantiles for each of the ensemble members in a BNRH distribution + call bnrh_cdf_initialized_vector(state_ens, ens_size, p, quantile) + +else + ! Get all the info about the rank histogram cdf + call bnrh_cdf_params(state_ens, ens_size, bounded_below, bounded_above, & + lower_bound, upper_bound, p, quantile) + + ! Do not know what to do if sd is 0 (or small, work on this later) + if(get_bnrh_sd(p) <= 0.0_r8) then + ! Just return the original ensemble + probit_ens = state_ens + return + endif + +endif + +! Transform the quantiles to probit space +do i = 1, ens_size + probit_ens(i) = probit_or_logit_transform(quantile(i)) +end do + +end subroutine to_probit_bounded_normal_rh + +!------------------------------------------------------------------------ + +!!!subroutine to_probit_particle(ens_size, state_ens, p, probit_ens, & + !!!use_input_p, bounded_below_in, bounded_above_in, lower_bound_in, upper_bound_in) +!!! +!!!! Doing a particle filter. Quantiles are (2i-1) / 2n +!!! +!!!integer, intent(in) :: ens_size +!!!real(r8), intent(in) :: state_ens(ens_size) +!!!type(distribution_params_type), intent(inout) :: p +!!!real(r8), intent(out) :: probit_ens(ens_size) +!!!logical, intent(in) :: use_input_p +!!!logical, intent(in) :: bounded_below_in, bounded_above_in +!!!real(r8), intent(in) :: lower_bound_in, upper_bound_in +!!! +!!!integer :: i, j, indx +!!!integer :: ens_index(ens_size) +!!!real(r8) :: quantile +!!! +!!!! This should fail if any of the input states are not the same as one of the +!!!! original ensemble states when use_input_p is false. +!!!if(use_input_p) then + !!!! The particles are available from a previous call + !!!! The input member gets the same quantile as the corresponding member from the previous call + !!!! This can be done vastly more efficiently with either binary searches or by first sorting the + !!!! incoming state_ens so that the lower bound for starting the search is updated with each ensemble member + !!! + !!!do i = 1, ens_size + !!!! Loop through the previous ensemble members + !!!quantile = -99_r8 + !!!do j = 1, ens_size + !!!! Is exact equivalence a problem here? + !!!if(state_ens(i) == p%params(j)) then + !!!quantile = 2*(j-1) / (2*ens_size) + !!!exit + !!!endif + !!!! Test failed to find a match + !!!if(quantile < 0.0_r8) then + !!!write(errstring, *) 'Unable to find prior for use_input_p', state_ens(i) + !!!call error_handler(E_ERR, 'to_probit_particle', errstring, source) + !!!endif + !!!! Do probit/logit transform + !!!probit_ens(i) = probit_or_logit_transform(quantile) + !!!end do + !!!end do + !!! +!!!else + !!!! Not using a pre-existing distribution + !!!! Take care of space for the transform data structure, just need to know sorted prior members + !!!if(allocated(p%params)) deallocate(p%params) + !!!allocate(p%params(ens_size)) +!!! + !!!! For particle filter, the required data for inversion is the original ensemble values + !!!! Having them in sorted order is useful for subsequent inversion + !!!call index_sort(state_ens, ens_index, ens_size) + !!!p%params(1:ens_size) = state_ens(ens_index) +!!! + !!!! Get the quantiles for each of the ensemble members + !!!do i = 1, ens_size + !!!indx = ens_index(i) + !!!! The quantiles for a particle filter are just 2(i-1) / 2n + !!!quantile = 2*(indx - 1) / (2 * ens_size) +!!! + !!!! Transform the quantiles to probit/logit space + !!!probit_ens(indx) = probit_or_logit_transform(quantile) + !!!end do +!!! +!!!endif +!!! +!!!end subroutine to_probit_particle +!!! +!------------------------------------------------------------------------ + +subroutine transform_all_from_probit(ens_size, num_vars, probit_ens, p, state_ens) + +integer, intent(in) :: ens_size +integer, intent(in) :: num_vars +real(r8), intent(in) :: probit_ens(:, :) +type(distribution_params_type), intent(inout) :: p(num_vars) +real(r8), intent(out) :: state_ens(:, :) + +! Transform back to the original space +integer :: i +real(r8) :: temp_ens(ens_size) + +do i = 1, num_vars + call transform_from_probit(ens_size, probit_ens(1:ens_size, i), p(i), temp_ens) + state_ens(1:ens_size, i) = temp_ens +end do + +end subroutine transform_all_from_probit + +!------------------------------------------------------------------------ + +subroutine transform_from_probit(ens_size, probit_ens, p, state_ens) + +integer, intent(in) :: ens_size +real(r8), intent(in) :: probit_ens(ens_size) +type(distribution_params_type), intent(inout) :: p +real(r8), intent(out) :: state_ens(ens_size) + +! If not initialized, read in the namelist +if(.not. module_initialized) call initialize_probit_transform + +! Transform back to the original space +if(p%distribution_type == NORMAL_DISTRIBUTION) then + ! No need to do any transformation for a normal + state_ens = probit_ens +elseif(p%distribution_type == LOG_NORMAL_DISTRIBUTION) then + call from_probit_log_normal(ens_size, probit_ens, state_ens) +elseif(p%distribution_type == UNIFORM_DISTRIBUTION) then + call from_probit_uniform(ens_size, probit_ens, p, state_ens) +elseif(p%distribution_type == GAMMA_DISTRIBUTION) then + call from_probit_gamma(ens_size, probit_ens, p, state_ens) +elseif(p%distribution_type == BETA_DISTRIBUTION) then + call from_probit_beta(ens_size, probit_ens, p, state_ens) +elseif(p%distribution_type == BOUNDED_NORMAL_RH_DISTRIBUTION) then + call from_probit_bounded_normal_rh(ens_size, probit_ens, p, state_ens) +!!!elseif(p%distribution_type == PARTICLE_FILTER_DISTRIBUTION) then + !!!call from_probit_particle(ens_size, probit_ens, p, state_ens) +else + write(errstring, *) 'Illegal distribution type', p%distribution_type + call error_handler(E_ERR, 'transform_from_probit', errstring, source) + stop +endif + +! Deallocate any allocatable storage that was used for this distribution +call deallocate_distribution_params(p) + +end subroutine transform_from_probit + +!------------------------------------------------------------------------ + +subroutine from_probit_log_normal(ens_size, probit_ens, state_ens) + +integer, intent(in) :: ens_size +real(r8), intent(in) :: probit_ens(ens_size) +real(r8), intent(out) :: state_ens(ens_size) + +! Take the inverse of the log to get back to original space +state_ens = exp(probit_ens) + +end subroutine from_probit_log_normal + +!------------------------------------------------------------------------ + +subroutine from_probit_uniform(ens_size, probit_ens, p, state_ens) + +integer, intent(in) :: ens_size +real(r8), intent(in) :: probit_ens(ens_size) +type(distribution_params_type), intent(inout) :: p +real(r8), intent(out) :: state_ens(ens_size) + +real(r8) :: quantile +integer :: i + +do i = 1, ens_size + ! First, invert the probit to get a quantile + quantile = inv_probit_or_logit_transform(probit_ens(i)) + ! Transform from U(0, 1) to U(lower_bound, upper_bound) + state_ens(i) = p%lower_bound + quantile * (p%upper_bound - p%lower_bound) +end do + +end subroutine from_probit_uniform + +!------------------------------------------------------------------------ + +subroutine from_probit_gamma(ens_size, probit_ens, p, state_ens) + +integer, intent(in) :: ens_size +real(r8), intent(in) :: probit_ens(ens_size) +type(distribution_params_type), intent(inout) :: p +real(r8), intent(out) :: state_ens(ens_size) + +! Transform back to the original space +real(r8) :: quantile +integer :: i + +do i = 1, ens_size + ! First, invert the probit/logit to get a quantile + quantile = inv_probit_or_logit_transform(probit_ens(i)) + ! Invert the gamma quantiles to get physical space + state_ens(i) = inv_gamma_cdf_params(quantile, p) +end do + +end subroutine from_probit_gamma + +!------------------------------------------------------------------------ + +subroutine from_probit_beta(ens_size, probit_ens, p, state_ens) + +integer, intent(in) :: ens_size +real(r8), intent(in) :: probit_ens(ens_size) +type(distribution_params_type), intent(inout) :: p +real(r8), intent(out) :: state_ens(ens_size) + +! Transform back to the original space +real(r8) :: quantile +integer :: i + +do i = 1, ens_size + ! First, invert the probit/logit to get a quantile + quantile = inv_probit_or_logit_transform(probit_ens(i)) + ! Invert the beta quantiles to get scaled physical space + state_ens(i) = inv_beta_cdf_params(quantile, p) +end do + +end subroutine from_probit_beta + +!------------------------------------------------------------------------ + +subroutine from_probit_bounded_normal_rh(ens_size, probit_ens, p, state_ens) + +integer, intent(in) :: ens_size +real(r8), intent(in) :: probit_ens(ens_size) +type(distribution_params_type), intent(inout) :: p +real(r8), intent(out) :: state_ens(ens_size) + +integer :: i +real(r8) :: quantiles(ens_size) + +! Do not know what to do if original ensemble had all members the same (or nearly so???) +if(get_bnrh_sd(p) <= 0.0_r8) then + state_ens = probit_ens +else + + ! Transform each probit ensemble member back to physical space + do i = 1, ens_size + ! First, invert the probit/logit to get quantiles + quantiles(i) = inv_probit_or_logit_transform(probit_ens(i)) + end do + + ! Invert the rank histogram CDF to get the physical space ensemble + call inv_bnrh_cdf_params(quantiles, ens_size, p, state_ens) +endif + +end subroutine from_probit_bounded_normal_rh + +!------------------------------------------------------------------------ + +!!!subroutine from_probit_particle(ens_size, probit_ens, p, state_ens) +!!! +!!!integer, intent(in) :: ens_size +!!!real(r8), intent(in) :: probit_ens(ens_size) +!!!type(distribution_params_type), intent(inout) :: p +!!!real(r8), intent(out) :: state_ens(ens_size) +!!! +!!!integer :: i, indx +!!!real(r8) :: quantile +!!! +!!!do i = 1, ens_size + !!!! First invert the probit/logit transform to tg + !!!quantile = inv_probit_or_logit_transform(probit_ens(i)) +!!! + !!!! Invert the quantile for a particle prior + !!!! There is a prior ensemble member associated with each 1/ens_size fraction of the quantile + !!!! range + !!!indx = floor(quantile * ens_size) + 1 + !!!if(indx <= 0) indx = 1 + !!!state_ens(i) = p%more_params(indx) +!!!end do +!!! +!!!! Probably do this explicitly +!!!! Free the storage +!!!deallocate(p%more_params) +!!! +!!!end subroutine from_probit_particle + +!------------------------------------------------------------------------ + +function probit_or_logit_transform(quantile) + +real(r8) :: probit_or_logit_transform +real(r8), intent(in) :: quantile + +! Transform the quantile +if(use_logit_instead_of_probit) then + probit_or_logit_transform = log(quantile / (1.0_r8 - quantile)) +else + probit_or_logit_transform = inv_std_normal_cdf(quantile) +endif + +end function probit_or_logit_transform + +!------------------------------------------------------------------------ + +function inv_probit_or_logit_transform(p) + +real(r8) :: inv_probit_or_logit_transform +real(r8), intent(in) :: p + +! Transform back to get a quantile +if(use_logit_instead_of_probit) then + inv_probit_or_logit_transform = 1.0_r8 / (1.0_r8 + exp(-p)) +else + inv_probit_or_logit_transform = normal_cdf(p, 0.0_r8, 1.0_r8) +endif + +end function inv_probit_or_logit_transform + +!------------------------------------------------------------------------ +subroutine initialize_probit_transform() + +integer :: iunit, io + +module_initialized = .true. + +! Read the namelist entry +call find_namelist_in_file("input.nml", "probit_transform_nml", iunit) +read(iunit, nml = probit_transform_nml, iostat = io) +call check_namelist_read(iunit, io, "probit_transform_nml") + +if (do_nml_file()) write(nmlfileunit,nml=probit_transform_nml) +if (do_nml_term()) write( * ,nml=probit_transform_nml) + +end subroutine initialize_probit_transform + +!------------------------------------------------------------------------ +function fix_bounds(x, bounded_below, bounded_above, lower_bound, upper_bound) + +real(r8) :: fix_bounds +real(r8), intent(in) :: x +logical, intent(in) :: bounded_below, bounded_above +real(r8), intent(in) :: lower_bound, upper_bound + +! A variety of round off errors can lead to small violations of the bounds for state and +! observation quantities. This function corrects the violations if they are small. If +! they are bigger than the egregious bound set here, then execution is terminated. + +real(r8), parameter :: egregious_bound_threshold = 1.0e-12_r8 + +! Default behavior is to leave x unchanged +fix_bounds = x + +! Fail here on egregious violations; this could be removed +if(bounded_below) then + if(lower_bound - x > egregious_bound_threshold) then + write(errstring, *) 'Egregious lower bound violation (see code)', x, lower_bound + call error_handler(E_ERR, 'fix_bounds', errstring, source) + else + fix_bounds = max(x, lower_bound) + endif +endif + +if(bounded_above) then + if(x - upper_bound > egregious_bound_threshold) then + write(errstring, *) 'Egregious upper bound violoation first check(see code)', x, upper_bound + call error_handler(E_ERR, 'fix_bounds', errstring, source) + else + fix_bounds = min(x, upper_bound) + endif +endif + +end function fix_bounds + +!------------------------------------------------------------------------ + +end module probit_transform_mod diff --git a/assimilation_code/modules/assimilation/probit_transform_mod.nml b/assimilation_code/modules/assimilation/probit_transform_mod.nml new file mode 100644 index 0000000000..4060c54c2b --- /dev/null +++ b/assimilation_code/modules/assimilation/probit_transform_mod.nml @@ -0,0 +1,6 @@ +&probit_transform_nml + fix_bound_violations = .false., + use_logit_instead_of_probit = .false., + do_inverse_check = .false. + / + diff --git a/assimilation_code/programs/perfect_model_obs/perfect_model_obs.f90 b/assimilation_code/programs/perfect_model_obs/perfect_model_obs.f90 index d419b43869..6417785e2c 100644 --- a/assimilation_code/programs/perfect_model_obs/perfect_model_obs.f90 +++ b/assimilation_code/programs/perfect_model_obs/perfect_model_obs.f90 @@ -23,8 +23,10 @@ program perfect_model_obs delete_seq_tail, destroy_obs, destroy_obs_sequence -use obs_def_mod, only : obs_def_type, get_obs_def_error_variance, get_obs_def_time +use obs_def_mod, only : obs_def_type, get_obs_def_error_variance, get_obs_def_time, & + get_obs_def_type_of_obs use obs_model_mod, only : move_ahead, advance_state, set_obs_model_trace +use obs_kind_mod, only : get_quantity_for_type_of_obs use assim_model_mod, only : static_init_assim_model, get_model_size, & get_initial_condition @@ -61,6 +63,8 @@ program perfect_model_obs use mpi_utilities_mod, only : my_task_id +use algorithm_info_mod, only : init_algorithm_info_mod, obs_error_info, end_algorithm_info_mod + implicit none character(len=*), parameter :: source = 'perfect_model_obs.f90' @@ -107,7 +111,8 @@ program perfect_model_obs obs_seq_out_file_name = 'obs_seq.out', & adv_ens_command = './advance_model.csh' -namelist /perfect_model_obs_nml/ read_input_state_from_file, write_output_state_to_file, & +namelist /perfect_model_obs_nml/ read_input_state_from_file,& + write_output_state_to_file, & init_time_days, init_time_seconds, async, & first_obs_days, first_obs_seconds, & last_obs_days, last_obs_seconds, output_interval, & @@ -172,6 +177,11 @@ subroutine perfect_main() character(len=256), allocatable :: input_filelist(:), output_filelist(:), true_state_filelist(:) integer :: nfilesin, nfilesout +! Storage for bounded error +logical :: bounded_below, bounded_above +real(r8) :: lower_bound, upper_bound +real(r8) :: error_variance + ! Initialize all modules used that require it call perfect_initialize_modules_used() @@ -539,8 +549,38 @@ subroutine perfect_main() ! If observation is not being evaluated or assimilated, skip it ! Ends up setting a 1000 qc field so observation is not used again. if( qc_ens_handle%vars(i, 1) == 0 ) then - obs_value(1) = random_gaussian(random_seq, true_obs(1), & - sqrt(get_obs_def_error_variance(obs_def))) + + ! Get the information for generating error sample for this observation + call obs_error_info(obs_def, error_variance, & + bounded_below, bounded_above, lower_bound, upper_bound) + + ! Capability to do a bounded normal error + if(bounded_below .and. bounded_above) then + ! Bounds on both sides + obs_value(1) = lower_bound - 1.0_r8 + do while(obs_value(1) < lower_bound .or. obs_value(1) > upper_bound) + obs_value(1) = random_gaussian(random_seq, true_obs(1), & + sqrt(error_variance)) + end do + elseif(bounded_below .and. .not. bounded_above) then + ! Bound on lower side + obs_value(1) = lower_bound - 1.0_r8 + do while(obs_value(1) < lower_bound) + obs_value(1) = random_gaussian(random_seq, true_obs(1), & + sqrt(error_variance)) + end do + elseif(.not. bounded_below .and. bounded_above) then + ! Bound on upper side + obs_value(1) = upper_bound + 1.0_r8 + do while(obs_value(1) > upper_bound) + obs_value(1) = random_gaussian(random_seq, true_obs(1), & + sqrt(error_variance)) + end do + else + ! No bounds, regular old normal distribution + obs_value(1) = random_gaussian(random_seq, true_obs(1), & + sqrt(error_variance)) + endif ! FIX ME SPINT: if the foward operater passed can we directly set the ! qc status? @@ -618,6 +658,8 @@ subroutine perfect_main() call destroy_obs_sequence(seq) call trace_message('After ensemble and obs memory cleanup') +call end_algorithm_info_mod() + call trace_message('Perfect_model done') call timestamp_message('Perfect_model done') @@ -645,6 +687,7 @@ subroutine perfect_initialize_modules_used() call state_vector_io_init() call initialize_qc() +call init_algorithm_info_mod() end subroutine perfect_initialize_modules_used diff --git a/conf.py b/conf.py index dcee0201d1..cf4bf8f786 100644 --- a/conf.py +++ b/conf.py @@ -21,7 +21,7 @@ author = 'Data Assimilation Research Section' # The full version, including alpha/beta/rc tags -release = '10.10.1' +release = '11.0.0' root_doc = 'index' # -- General configuration --------------------------------------------------- diff --git a/developer_tests/qceff/test_table_read.f90 b/developer_tests/qceff/test_table_read.f90 new file mode 100644 index 0000000000..5958684a10 --- /dev/null +++ b/developer_tests/qceff/test_table_read.f90 @@ -0,0 +1,19 @@ +! DART software - Copyright UCAR. This open source software is provided +! by UCAR, "as is", without charge, subject to all terms of use at +! http://www.image.ucar.edu/DAReS/DART/DART_download + +program test_table_read + +use algorithm_info_mod, only : init_algorithm_info_mod, end_algorithm_info_mod +use utilities_mod, only : initialize_utilities, finalize_utilities + +implicit none + +call initialize_utilities('test_table_read') + +call init_algorithm_info_mod() +call end_algorithm_info_mod() + +call finalize_utilities() + +end program test_table_read diff --git a/developer_tests/qceff/work/all_bnrhf_qceff_table.csv b/developer_tests/qceff/work/all_bnrhf_qceff_table.csv new file mode 100644 index 0000000000..3e50dfea1c --- /dev/null +++ b/developer_tests/qceff/work/all_bnrhf_qceff_table.csv @@ -0,0 +1,5 @@ +QCF table version: 1,obs_error_info,,,,probit_inflation,,,,,probit_state,,,,,probit_extended_state,,,,,obs_inc_info,,,, +QTY_TEMPLATE:,bounded_below,bounded_above,lower_bound,upper_bound,dist_type,bounded_below,bounded_above,lower_bound,upper_bound,dist_type,bounded_below,bounded_above,lower_bound,upper_bound,dist_type,bounded_below,bounded_above,lower_bound,upper_bound,filter_kind,bounded_below,bounded_above,lower_bound,upper_bound +QTY_state_VARIABLE,.false.,.false.,-888888,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.false.,.false.,-888888,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.false.,.false.,-888888,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.false.,.false.,-888888,-888888,BOUNDED_NORMAL_RHF,.false.,.false.,-888888,-888888 +QTY_TRACER_CONCENTRATION,.true.,.false.,0,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.true.,.false.,0,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.true.,.false.,0,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.true.,.false.,0,-888888,BOUNDED_NORMAL_RHF,.true.,.false.,0,-888888 +QTY_TRACER_SOURCE,.true.,.false.,0,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.true.,.false.,0,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.true.,.false.,0,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.true.,.false.,0,-888888,BOUNDED_NORMAL_RHF,.true.,.false.,0,-888888 diff --git a/developer_tests/qceff/work/input.nml b/developer_tests/qceff/work/input.nml new file mode 100644 index 0000000000..ffa7155436 --- /dev/null +++ b/developer_tests/qceff/work/input.nml @@ -0,0 +1,28 @@ +&utilities_nml + TERMLEVEL = 1, + module_details = .false. + logfilename = 'dart_log.out' + / + +# pick a random set of inputs +&preprocess_nml + overwrite_output = .true. + input_obs_qty_mod_file = '../../../assimilation_code/modules/observations/DEFAULT_obs_kind_mod.F90' + output_obs_qty_mod_file = '../../../assimilation_code/modules/observations/obs_kind_mod.f90' + input_obs_def_mod_file = '../../../observations/forward_operators/DEFAULT_obs_def_mod.F90' + output_obs_def_mod_file = '../../../observations/forward_operators/obs_def_mod.f90' + obs_type_files = '../../../observations/forward_operators/obs_def_reanalysis_bufr_mod.f90', + '../../../observations/forward_operators/obs_def_radar_mod.f90', + '../../../observations/forward_operators/obs_def_metar_mod.f90', + '../../../observations/forward_operators/obs_def_dew_point_mod.f90', + '../../../observations/forward_operators/obs_def_rel_humidity_mod.f90', + '../../../observations/forward_operators/obs_def_altimeter_mod.f90', + '../../../observations/forward_operators/obs_def_gps_mod.f90', + '../../../observations/forward_operators/obs_def_vortex_mod.f90', + '../../../observations/forward_operators/obs_def_gts_mod.f90', + '../../../observations/forward_operators/obs_def_QuikSCAT_mod.f90' + quantity_files = '../../../assimilation_code/modules/observations/default_quantities_mod.f90', + / + +&obs_kind_nml +/ diff --git a/developer_tests/qceff/work/qcf_table.txt b/developer_tests/qceff/work/qcf_table.txt new file mode 100644 index 0000000000..0449b56d87 --- /dev/null +++ b/developer_tests/qceff/work/qcf_table.txt @@ -0,0 +1,3 @@ +QCF table version: 1 +QTY_TEMPLATE: bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound filter_kind bounded_below bounded_above lower_bound upper_bound +QTY_STATE_VARIABLE .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. -888888.0 -888888.0 diff --git a/developer_tests/qceff/work/qcf_table_bad_qty.txt b/developer_tests/qceff/work/qcf_table_bad_qty.txt new file mode 100644 index 0000000000..3933fc478b --- /dev/null +++ b/developer_tests/qceff/work/qcf_table_bad_qty.txt @@ -0,0 +1,3 @@ +QCF table version: 1 +QTY_TEMPLATE: bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound filter_kind bounded_below bounded_above lower_bound upper_bound +QTY_HAIRCUT .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. -888888.0 -888888.0 diff --git a/developer_tests/qceff/work/qcf_table_broke.txt b/developer_tests/qceff/work/qcf_table_broke.txt new file mode 100644 index 0000000000..6585f67485 --- /dev/null +++ b/developer_tests/qceff/work/qcf_table_broke.txt @@ -0,0 +1,3 @@ +QCF table version: 1 +QTY_TEMPLATE: bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound filter_kind bounded_below bounded_above lower_bound upper_bound +QTY_STATE_VARIABLE .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. -888888.0 -888888.0 diff --git a/developer_tests/qceff/work/qcf_table_duplicates.txt b/developer_tests/qceff/work/qcf_table_duplicates.txt new file mode 100644 index 0000000000..7c4dfd9bd9 --- /dev/null +++ b/developer_tests/qceff/work/qcf_table_duplicates.txt @@ -0,0 +1,6 @@ +QCF table version: 1 +QTY_TEMPLATE: bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound filter_kind bounded_below bounded_above lower_bound upper_bound +QTY_VEGETATED_AREA_FRACTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. -888888.0 -888888.0 +QTY_AQUIFER_WATER .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. -888888.0 -888888.0 +QTY_SEAICE_SALINITY008 .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. -888888.0 -888888.0 +QTY_VEGETATED_AREA_FRACTION .true. .false. 0 1000 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. -888888.0 -888888.0 diff --git a/developer_tests/qceff/work/qcf_table_extra_columns.txt b/developer_tests/qceff/work/qcf_table_extra_columns.txt new file mode 100644 index 0000000000..3f1236b2b6 --- /dev/null +++ b/developer_tests/qceff/work/qcf_table_extra_columns.txt @@ -0,0 +1,3 @@ +QCF table version: 1 +QTY_TEMPLATE: bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound filter_kind bounded_below bounded_above lower_bound upper_bound frog +QTY_STATE_VARIABLE .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. -888888.0 -888888.0 toad newt diff --git a/developer_tests/qceff/work/qcf_table_incorrect_distribution.txt b/developer_tests/qceff/work/qcf_table_incorrect_distribution.txt new file mode 100644 index 0000000000..93d10d7869 --- /dev/null +++ b/developer_tests/qceff/work/qcf_table_incorrect_distribution.txt @@ -0,0 +1,3 @@ +QCF table version: 1 +QTY_TEMPLATE: bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound filter_kind bounded_below bounded_above lower_bound upper_bound +QTY_STATE_VARIABLE .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 POLAR_BEAR_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. -888888.0 -888888.0 diff --git a/developer_tests/qceff/work/qcf_table_incorrect_filter_kind.txt b/developer_tests/qceff/work/qcf_table_incorrect_filter_kind.txt new file mode 100644 index 0000000000..38d567f833 --- /dev/null +++ b/developer_tests/qceff/work/qcf_table_incorrect_filter_kind.txt @@ -0,0 +1,3 @@ +QCF table version: 1 +QTY_TEMPLATE: bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound filter_kind bounded_below bounded_above lower_bound upper_bound +QTY_STATE_VARIABLE .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 PENGUIN_FILTER .false. .false. -888888.0 -888888.0 diff --git a/developer_tests/qceff/work/qcf_table_lower_bound_only.txt b/developer_tests/qceff/work/qcf_table_lower_bound_only.txt new file mode 100644 index 0000000000..3916443577 --- /dev/null +++ b/developer_tests/qceff/work/qcf_table_lower_bound_only.txt @@ -0,0 +1,6 @@ +QCF table version: 1 +QTY_TEMPLATE: bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound filter_kind bounded_below bounded_above lower_bound upper_bound +QTY_STATE_VARIABLE .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. -888888.0 -888888.0 +QTY_AQUIFER_WATER .true. .false. 0.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. -888888.0 -888888.0 +QTY_SEAICE_SALINITY008 .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. -888888.0 -888888.0 +QTY_VEGETATED_AREA_FRACTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. -888888.0 -888888.0 diff --git a/developer_tests/qceff/work/qcf_table_lower_case_dist.txt b/developer_tests/qceff/work/qcf_table_lower_case_dist.txt new file mode 100644 index 0000000000..b6750b66ec --- /dev/null +++ b/developer_tests/qceff/work/qcf_table_lower_case_dist.txt @@ -0,0 +1,3 @@ +QCF table version: 1 +QTY_TEMPLATE: bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound filter_kind bounded_below bounded_above lower_bound upper_bound +QTY_STATE_VARIABLE .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_normal_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. -888888.0 -888888.0 diff --git a/developer_tests/qceff/work/qcf_table_lower_gt_upper.txt b/developer_tests/qceff/work/qcf_table_lower_gt_upper.txt new file mode 100644 index 0000000000..4afb33e579 --- /dev/null +++ b/developer_tests/qceff/work/qcf_table_lower_gt_upper.txt @@ -0,0 +1,6 @@ +QCF table version: 1 +QTY_TEMPLATE: bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound filter_kind bounded_below bounded_above lower_bound upper_bound +QTY_STATE_VARIABLE .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. -888888.0 -888888.0 +QTY_AQUIFER_WATER .true. .true. 10.0 0.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. -888888.0 -888888.0 +QTY_SEAICE_SALINITY008 .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. -888888.0 -888888.0 +QTY_VEGETATED_AREA_FRACTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. -888888.0 -888888.0 diff --git a/developer_tests/qceff/work/qcf_table_no_bounds_with_values.txt b/developer_tests/qceff/work/qcf_table_no_bounds_with_values.txt new file mode 100644 index 0000000000..c987d2f9e9 --- /dev/null +++ b/developer_tests/qceff/work/qcf_table_no_bounds_with_values.txt @@ -0,0 +1,6 @@ +QCF table version: 1 +QTY_TEMPLATE: bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound filter_kind spread_restoration bounded_below bounded_above lower_bound upper_bound +QTY_STATE_VARIABLE .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. -888888.0 -888888.0 +QTY_AQUIFER_WATER .false. .false. 10.0 0.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. -888888.0 -888888.0 +QTY_SEAICE_SALINITY008 .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. -888888.0 -888888.0 +QTY_VEGETATED_AREA_FRACTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. -888888.0 -888888.0 diff --git a/developer_tests/qceff/work/qcf_table_no_header.txt b/developer_tests/qceff/work/qcf_table_no_header.txt new file mode 100644 index 0000000000..617379ffe0 --- /dev/null +++ b/developer_tests/qceff/work/qcf_table_no_header.txt @@ -0,0 +1,4 @@ +QTY_STATE_VARIABLE .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. .false. .false. .false. .false. -888888.0 -888888.0 +QTY_AQUIFER_WATER .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. .false. .false. .false. .false. -888888.0 -888888.0 +QTY_SEAICE_SALINITY008 .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. .false. .false. .false. .false. -888888.0 -888888.0 +QTY_VEGETATED_AREA_FRACTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. .false. .false. .false. .false. -888888.0 -888888.0 diff --git a/developer_tests/qceff/work/qcf_table_v2.txt b/developer_tests/qceff/work/qcf_table_v2.txt new file mode 100644 index 0000000000..0d1b5a46df --- /dev/null +++ b/developer_tests/qceff/work/qcf_table_v2.txt @@ -0,0 +1,3 @@ +QCF table version: 2 +QTY_TEMPLATE: bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound dist_type bounded_below bounded_above lower_bound upper_bound filter_kind rectangular_quadrature gaussian_likelihood_tails sort_obs_inc spread_restoration bounded_below bounded_above lower_bound upper_bound +QTY_STATE_VARIABLE .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RH_DISTRIBUTION .false. .false. -888888.0 -888888.0 BOUNDED_NORMAL_RHF .false. .false. .false. .false. .false. .false. -888888.0 -888888.0 diff --git a/developer_tests/qceff/work/quickbuild.sh b/developer_tests/qceff/work/quickbuild.sh new file mode 100755 index 0000000000..81b1308494 --- /dev/null +++ b/developer_tests/qceff/work/quickbuild.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +# DART software - Copyright UCAR. This open source software is provided +# by UCAR, "as is", without charge, subject to all terms of use at +# http://www.image.ucar.edu/DAReS/DART/DART_download + +main() { + + +export DART=$(git rev-parse --show-toplevel) +source "$DART"/build_templates/buildfunctions.sh + +MODEL="none" +EXTRA="$DART"/models/template/threed_model_mod.f90 +dev_test=1 +TEST="qceff" +LOCATION="threed_sphere" + +serial_programs=( +test_table_read +) + +# quickbuild arguments +arguments "$@" + +# clean the directory +\rm -f -- *.o *.mod Makefile .cppdefs + +# build and run preprocess before making any other DART executables +buildpreprocess + +# build +buildit + +# clean up +\rm -f -- *.o *.mod + +} + +main "$@" diff --git a/developer_tests/qceff/work/runall.sh b/developer_tests/qceff/work/runall.sh new file mode 100755 index 0000000000..72597f775b --- /dev/null +++ b/developer_tests/qceff/work/runall.sh @@ -0,0 +1,71 @@ +#!/bin/bash + +# DART software - Copyright UCAR. This open source software is provided +# by UCAR, "as is", without charge, subject to all terms of use at +# http://www.image.ucar.edu/DAReS/DART/DART_download + +# Usage: +# ./runall.sh +# ./runall.sh | grep FAIL +# ./runall.sh | grep PASS + +function set_table () { +echo "&algorithm_info_nml +qceff_table_filename = '$1' +/ +$(cat input.nml)" > input.nml +} + +function run_test () { +cp input.nml input.nml.orig +set_table $1 +./test_table_read $1 +res=$? +cp input.nml.orig input.nml +return $res +} + +should_pass () { +if [[ $? -ne 0 ]]; then + echo $1: "FAIL" +else + echo $1: "PASS" +fi +} + +should_fail () { +if [[ $? -eq 0 ]]; then + echo $1: "FAIL" +else + echo $1: "PASS" +fi +} + +run_test ; should_pass "no table" + +run_test qcf_table.txt ; should_pass "correct v1 table" + +run_test qcf_table_v2.txt ; should_fail "detect wrong version" + +run_test qcf_table_extra_columns.txt ; should_pass "extra colums" + +run_test qcf_table_bad_qty.txt ; should_fail "bad qty" + +run_test qcf_table_broke.txt ; should_fail "bad value" + +run_test qcf_table_no_header.txt ; should_fail "no header" + +run_test qcf_table_lower_gt_upper.txt ; should_fail "upper bound less than lower" + +run_test qcf_table_lower_bound_only.txt ; should_pass "lower bound only" + +run_test qcf_table_no_bounds_with_values.txt ; should_pass "bounds false, values for bounds" + +run_test qcf_table_incorrect_filter_kind.txt ; should_fail "incorrect filter_kind" + +run_test qcf_table_incorrect_distribution.txt ; should_fail "incorrect distribution" + +run_test all_bnrhf_qceff_table.csv ; should_pass "lower case QTY" + +run_test qcf_table_lower_case_dist.txt; should_pass "lower case dist_type" + diff --git a/guide/_static/papers/QCEFF_3_submitted.pdf b/guide/_static/papers/QCEFF_3_submitted.pdf new file mode 100644 index 0000000000..44b56ba1ee Binary files /dev/null and b/guide/_static/papers/QCEFF_3_submitted.pdf differ diff --git a/guide/qceff-examples.rst b/guide/qceff-examples.rst new file mode 100644 index 0000000000..ea01524dcf --- /dev/null +++ b/guide/qceff-examples.rst @@ -0,0 +1,197 @@ +.. _quantile tracer: + +QCEFF: Examples with the Lorenz 96 Tracer Model +=============================================== + + +The Quantile-Conserving Ensemble Filter Framework (QCEFF) tools are available in DART +as of version v11. +The DART development team (dart@ucar.edu) would be happy to hear about your experiences and is +anxious to build scientific collaborations using these new capabilities. + +To get started, make sure that you are on the quantile_methods branch of DART: + +.. code-block:: text + + git checkout quantile_methods + +Build the DART executables for the Lorenz 96 tracer advection model: + +.. code-block:: text + + cd DART/models/lorenz_96_tracer_advection/work + ./quickbuild.sh nompi + + +The new quantile options are set using a :ref:`qceff table ` given as a namelist +option ``qceff_table_filename`` to &algorithm_info_nml. The examples below show how to change the quantile options +using various QCEFF tables. You can find the .csv files for these four examples in the directory +``DART/models/lorenz_96_tracer_advection/work`` + + +.. list-table:: + :header-rows: 1 + :widths: 15 60 25 + + * - example + - description + - .csv filename + * - Example A + - bounded normal rank histogram + - all_bnrhf_qceff_table.csv + * - Example B + - Ensemble Adjustment Kalman filters + - all_eakf_qceff_table.csv + * - Example C + - EAKF for state and bounded normal rank histogram filter and priors for tracer concentration and source + - state_eakf_tracer_bnrhf_qceff_table.csv + * - Example D + - Negative tracers bounded above + - neg_qceff_table.csv + + +You can view .csv files with a text editor, or spreadsheet tool such as Google Sheets, +or Microsoft Excel. + +Example A +---------- + +Assimilating observations of state (wind) and tracer concentration using +a rank histogram observation space filter and rank histogram probit transforms for +state variable updates. This example includes adaptive inflation. + +The default model configuration has a single tracer source at gridpoint 1 along with +small uniform tracer sinks that lead to areas where the true tracer concentration is +usually 0. This is a particularly tough test for ensemble methods. + +#. Edit input.nml to set the qceff_table_filename to 'all_bnrhf_qceff_table.csv' + + .. code-block:: text + + &algorithm_info_nml + qceff_table_filename = 'all_bnrhf_qceff_table.csv' + + +#. Create a set_def.out file using create_obs_sequence, + + ``./create_obs_sequence < create_obs_sequence_input`` + +#. Create an obs_sequence.in file using create_fixed_network_seq + + ``./create_fixed_network_seq`` + + .. code:: text + + Select the default input filename , + Create a regularly repeating sequence by entering "1", + Enter "1000" for the number of observation times, + Enter "0 0" for the initial time, + Enter "0 10800" for the period, + Select the default output filename, + +#. Spin-up a model initial condition by running perfect_model_obs + + ``./perfect_model_obs`` + +#. Generate a spun-up true time series, + + ``cp perfect_output.nc perfect_input.nc`` + + + Edit input.nml to set read_input_state_from_file to .true. + + .. code:: text + + &perfect_model_obs_nml + read_input_state_from_file = .true., + + + Run ``./perfect_model_obs`` again. + +#. Run a filter assimilation, + + ``./filter`` + +#. Examine the output with your favorite tool(s) (e.g. plot_ens_time_series.m). Looking at the analysis ensemble + for the tracer_concentration variables with indices near the source (location 1) + and far downstream from the source (location 35) is interesting. + Near the source, the true concentration and the ensemble estimates are all non-zero while far from the source + there are times when the true concentration and many ensemble members are zero. For further detail + see Anderson et al. (2023). [1]_ + Note that the source estimation capabilities of the model and filters are not being tested here. + + +Example B +--------- + +Using Ensemble Adjustment Kalman filters. + + +#. Edit input.nml to set the qceff_table_filename to 'all_eakf_qceff_table.csv' + + .. code-block:: text + + &algorithm_info_nml + qceff_table_filename = 'all_eakf_qceff_table.csv' + + +#. Run the filter + + ``./filter`` + +Example C +--------- + +Using Ensemble Adjustment Kalman filter for state, but bounded normal rank histogram filter and priors for tracer concentration and source. + + +#. Edit input.nml to set the qceff_table_filename to state_eakf_tracer_bnrhf_qceff_table.csv + + .. code-block:: text + + &algorithm_info_nml + qceff_table_filename = 'state_eakf_tracer_bnrhf_qceff_table.csv' + + +#. Run the filter + + ``./filter`` + +Example D +---------- + +Testing the bounded above option. Normally tracers are bounded below, but there are other quantities that may be bounded +above. There are distinct numerical challenges in implementing the quantile algorithms +for quantities that are bounded above, so flipping the sign of the tracers is a good +test. + +#. Edit input.nml to set the qceff_table_filename to neg_qceff_table.csv + + .. code-block:: text + + &algorithm_info_nml + qceff_table_filename = 'neg_qceff_table.csv' + + +#. Edit input.nml, to change the entry positive_tracer to .false. and read_input_state_from_file back to .false. + + + .. code-block:: text + + &model_nml + positive_tracer = .false., + + &perfect_model_obs_nml + read_input_state_from_file = .false., + + +#. Repeat steps 3-6 from Test A. + +References +---------- + +.. [1] Anderson, J. L., Riedel, C., Wieringa, M., Ishraque, F., Smith, M., Kershaw, H. + 2023: A Quantile-Conserving + Ensemble Filter Framework. Part III: Data Assimilation for Mixed Distributions + with Application to a Low-Order Tracer Advection Model. *Monthly Weather Review* + `[Manuscript submitted for publication] <../_static/papers/QCEFF_3_submitted.pdf>`_ diff --git a/guide/qceff_probit.rst b/guide/qceff_probit.rst new file mode 100644 index 0000000000..dddd4fbf7f --- /dev/null +++ b/guide/qceff_probit.rst @@ -0,0 +1,155 @@ +.. _QCEFF: + +Quantile-Conserving Ensemble Filter Framework +============================================== + +The Quantile-Conserving Ensemble Filter Framework (QCEFF) tools are available in DART +as of version v11. +The DART development team (dart@ucar.edu) would be happy to hear about your experiences +and is anxious to build scientific collaborations using these new capabilities. + +The QCEFF options are set using a :ref:`qceff table ` given as a namelist option to &algorithm_info_nml. + + .. code-block:: text + + &algorithm_info_nml + qceff_table_filename = 'qceff_table.csv' + + +.. _QCEFF options: + +QCEFF options +-------------- + +QCEFF options are per quantity. For a given quantity, you specify the following +options as columns of the qceff_table: + +* Observation error information + + Provides information about boundedness constraints that control the likelihood + distribution associated with an observed variable when using perfect_model_obs + to generate noisy observations. + + * bounded_below (default .false.) + * bounded_above (default .false.) + * lower_bound + * upper_bound + + +* Probit distribution information + + Used in the computation of the probit transform. + The values needed are the bounds and the distribution type. + These can be different for all three cases (inflation, state, and extended_state priors) + + * distribution (one of :ref:`Distributions`) + * bounded_below (default .false.) + * bounded_above (default .false.) + * lower_bound (default -888888) + * upper_bound (default -888888) + + +* Observation increment information + + * filter_kind (one of :ref:`Filter kinds`) + * bounded_below (default .false.) + * bounded_above (default .false.) + * lower_bound (default -888888) + * upper_bound (default -888888) + + + +.. _qceff table: + +Creating a qceff table +----------------------- + +The table has two headers, row 1 and 2. +The first row is the version number. The second row describes the :ref:`QCEFF options` corresponding to each column of the table. +These two headers must be present in your qceff table. +The :ref:`qcf trunc table` below shows the table headers, +and an example quantity QTY_TRACER_CONCENTRATION for the first 5 columns of the table. +There is a complete table with all 25 columns in `Google Sheets `_. You can copy and edit this table as needed. + +To add a quantity, add a row to the table. +For any quantity not listed in the table, the :ref:`Default values` values will be used for all 25 options. +You only have to add rows for quantities that use non-default values for any of the input options. +Ensure that there are no empty rows in between the quantities listed in the spreadsheet. +Save your spreadsheet as a .csv file. + +To run filter or perfect_model_obs, put the .csv file in the directory where you are running. +Edit input.nml to set the algorithm_info_nml option qceff_table_filename, for example: + + + .. code-block:: text + + &algorithm_info_nml + qceff_table_filename = 'qceff_table.csv' + + +.. _qcf trunc table: + +.. list-table:: truncated table + :header-rows: 2 + + * - QCF table version: 1 + - + - + - + - + * - QTY + - bounded_below + - bounded_above + - lower_bound + - upper_bound + * - QTY_TRACER_CONCENTRATION + - .true. + - .false. + - 0 + - -888888 + + +.. _Filter kinds: + +Available filter kinds +----------------------- + + * EAKF (default) + * ENKF + * UNBOUNDED_RHF + * GAMMA_FILTER + * BOUNDED_NORMAL_RHF + +.. _Distributions: + +Available distributions +------------------------ + + * NORMAL_DISTRIBUTION (default) + * BOUNDED_NORMAL_RH_DISTRIBUTION + * GAMMA_DISTRIBUTION + * BETA_DISTRIBUTION + * LOG_NORMAL_DISTRIBUTION + * UNIFORM_DISTRIBUTION + + + +.. _Default values: + +Default values +--------------- + +If a quantity is not in the qceff table, the following default values +are used: + + * filter_kind (default EAKF) + * dist_type (default NORMAL_DISTRIBUTION) + * bounded_below (default .false.) + * bounded_above (default .false.) + * lower_bound (default -888888) + * upper_bound (default -888888) + +.. note:: + + -888888 is a missing value in DART. + diff --git a/index.rst b/index.rst index 389e223bea..417ee8bee3 100644 --- a/index.rst +++ b/index.rst @@ -44,6 +44,7 @@ DART includes: - Extensive documentation of its source code. - Interfaces to a variety of models and observation sets that can be used to introduce new users or graduate students to ensemble DA. +- Nonlinear and Non-Gaussian DA Capabilities DART is also designed to facilitate the combination of assimilation algorithms, models, and real or synthetic observations to allow increased @@ -55,6 +56,56 @@ These tools are intended for use by the full range of geosciencies community: beginners and experts; students and teachers; national centers and university research labs. +Nonlinear and Non-Gaussian Data Assimilation Capabilities in DART +----------------------------------------------------------------- + +One of the historical drawbacks of ensemble data assimilation techniques is the +assumption that the quantities being assimilated obey a normal distribution. +While this is often a safe assumption -- distributions of temperature and +pressure can be approximated using a normal distribution -- many quantities +such as precipitation, snow depth and tracer concentration, as well as many +model parameters aren't normally distributed. + +Applying traditional ensemble data assimilation techniques in situations where +assumptions of gaussianity are invalid can lead to poor forecast skill and +inconclusive results. + +To overcome these problems, DART now implements a novel data assimilation +technique that no longer requires quantities to be normally distributed. The +Quantile-Conserving Ensemble Filtering Framework :ref:`(QCEFF) ` +provides a general method of computing increments for the prior ensemble of an +observed quantity by allowing the use of arbitrary distributions for the prior +and the observation error. For a detailed description of the QCEFF, see +Anderson (2022). [2]_ + +While the QCEFF for computing observation increments can lead to significant improvements in +analysis estimates for observed variables, those improvements can be lost when using standard +linear regression of observation increments to update state variables. The QCEFF also +implements a capability to do regression in a probit probability integral transformed space. +Doing the regression of observation quantile increments in the transformed space guarantees +that the posterior ensembles for state variables also retain the advantages of the observation space +quantile conserving posteriors. For example, if state variables are bounded, then posterior +ensembles will respect the bounds. The posterior ensembles also respect other aspects of the +continuous prior distributions. For a detailed description of this process, see +Anderson (2023) [3]_ and Anderson et al. (2023). [4]_ + +Inflation and localization, methods that improve the quality of ensemble DA, can also negate +the advantages of the QCEFF methods. For this reason, both localization and inflation can be +done in the probit-transformed quantile space as well. Combining these new methods can +significantly improve data assimilation for non-Gaussian quantities in Earth system models by +extending the capabilities of ensemble DA to general non-Gaussian and nonlinear distributions. +Transformative improvements in DA can result for many applications. The largest improvements are +found for bounded variables like tracer concentrations, snow and ice depth, soil moisture, and +similar quantities in other parts of the Earth system. Model parameters can also be estimated +with DA and large improvements can occur for bounded parameters. Variables that have distinctly +non-Gaussian prior distributions can also see large improvements. Examples can include atmospheric +quantities like moisture and cloud amount in the presence of convection, and many land surface variables. + +For instructions on how to use these tools, see :ref:`QCEFF`. + +For step-by-step examples of the QCEFF tools, you can work through +:ref:`examples with the Lorenz 96 tracer model ` + Organization of the documentation --------------------------------- @@ -208,6 +259,18 @@ References Facility. *Bulletin of the American Meteorological Society*, **90**, 1283-1296, `doi:10.1175/2009BAMS2618.1 `_ +.. [2] Anderson, J. L., 2022: A Quantile-Conserving Ensemble Filter Framework. + Part I: Updating an Observed Variable. *Monthly Weather Review*, **150**, + 1061–1074, `doi:10.1175/MWR-D-21-0229.1 `_ +.. [3] Anderson, J. L., 2023: A Quantile-Conserving Ensemble Filter Framework. + Part II: Regression of Observation Increments in a Probit and + Probability Integral Transformed Space. *Monthly Weather Review*, + **151**, 2759–2777, `doi:10.1175/MWR-D-23-0065.1 `_ +.. [4] Anderson, J. L., Riedel, C., Wieringa, M., Ishraque, F., Smith, M., Kershaw, H. + 2023: A Quantile-Conserving + Ensemble Filter Framework. Part III: Data Assimilation for Mixed Distributions + with Application to a Low-Order Tracer Advection Model. *Monthly Weather Review* + `[Manuscript submitted for publication] <_static/papers/QCEFF_3_submitted.pdf>`_ .. |spaghetti_square| image:: ./guide/images/DARTspaghettiSquare.gif :width: 100% @@ -244,6 +307,7 @@ References guide/high-level-da-workflows guide/dart-design-philosophy guide/important-capabilities-dart + guide/qceff_probit .. toctree:: :maxdepth: 2 @@ -361,6 +425,7 @@ References guide/DART_LAB/DART_LAB CLM-DART Tutorial WRF-DART Tutorial + guide/qceff-examples.rst .. toctree:: :maxdepth: 2 diff --git a/models/9var/work/input.nml b/models/9var/work/input.nml index cfe617f0ce..4ace01b342 100644 --- a/models/9var/work/input.nml +++ b/models/9var/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml read_input_state_from_file = .true., single_file_in = .true. @@ -89,7 +96,6 @@ / &assim_tools_nml - filter_kind = 1, cutoff = 1000000.0, sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/FESOM/work/input.nml b/models/FESOM/work/input.nml index 5334abff8c..f8b1d7304b 100644 --- a/models/FESOM/work/input.nml +++ b/models/FESOM/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml read_input_state_from_file = .true. input_state_files = "ENS01.2009.oce.nc" @@ -87,7 +94,6 @@ / &assim_tools_nml - filter_kind = 1 cutoff = 0.005 sort_obs_inc = .false. spread_restoration = .false. diff --git a/models/LMDZ/work/input.nml b/models/LMDZ/work/input.nml index edbf8617c5..a3d72caa37 100644 --- a/models/LMDZ/work/input.nml +++ b/models/LMDZ/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &filter_nml async = 2, adv_ens_command = "./advance_model.csh", @@ -74,7 +81,6 @@ perturbation_amplitude = 0.2 / &assim_tools_nml - filter_kind = 1, cutoff = 0.2, sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/MITgcm_annulus/work/input.nml b/models/MITgcm_annulus/work/input.nml index b7301418ea..e91333406d 100644 --- a/models/MITgcm_annulus/work/input.nml +++ b/models/MITgcm_annulus/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml start_from_restart = .false., output_restart = .true., @@ -74,7 +81,6 @@ / &assim_tools_nml - filter_kind = 1, cutoff = 0.15, sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/MITgcm_ocean/inputs/input.nml b/models/MITgcm_ocean/inputs/input.nml index 38a48e3553..351fcfffe6 100644 --- a/models/MITgcm_ocean/inputs/input.nml +++ b/models/MITgcm_ocean/inputs/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &location_nml horiz_dist_only = .true. nlon = 71 diff --git a/models/MITgcm_ocean/work/input.nml b/models/MITgcm_ocean/work/input.nml index 9bc7844826..f92ab19c39 100644 --- a/models/MITgcm_ocean/work/input.nml +++ b/models/MITgcm_ocean/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml start_from_restart = .true. output_restart = .true. @@ -86,7 +93,6 @@ # in both lists, the same number of items &assim_tools_nml - filter_kind = 1 cutoff = 0.025 distribute_mean = .false. sort_obs_inc = .false. diff --git a/models/MOM6/work/input.nml b/models/MOM6/work/input.nml index 860e574da4..e442ab8c2a 100644 --- a/models/MOM6/work/input.nml +++ b/models/MOM6/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml read_input_state_from_file = .true., single_file_in = .true. @@ -86,7 +93,6 @@ / &assim_tools_nml - filter_kind = 1, cutoff = 1000000.0 sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/NAAPS/work/input.nml b/models/NAAPS/work/input.nml index 13f65c076f..92c3ecf452 100644 --- a/models/NAAPS/work/input.nml +++ b/models/NAAPS/work/input.nml @@ -1,5 +1,11 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &assim_tools_nml - filter_kind = 1 cutoff = 0.03 sort_obs_inc = .false. spread_restoration = .false. diff --git a/models/NCOMMAS/work/input.nml b/models/NCOMMAS/work/input.nml index 79a7b30c8e..f8c477f70b 100644 --- a/models/NCOMMAS/work/input.nml +++ b/models/NCOMMAS/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml start_from_restart = .true., output_restart = .true., @@ -73,7 +80,6 @@ # cutoff of 0.03 (radians) is about 200km &assim_tools_nml - filter_kind = 1, cutoff = 0.20, sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/POP/work/input.nml b/models/POP/work/input.nml index 8863032412..6fb54e6f2c 100644 --- a/models/POP/work/input.nml +++ b/models/POP/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml read_input_state_from_file = .true. single_file_in = .false. @@ -88,7 +95,6 @@ # if running a smaller pop case, use false to run faster. # &assim_tools_nml - filter_kind = 1 cutoff = 0.20 sort_obs_inc = .false. spread_restoration = .false. diff --git a/models/ROMS/work/input.nml b/models/ROMS/work/input.nml index 349bf35dcf..ad4ccd3aca 100644 --- a/models/ROMS/work/input.nml +++ b/models/ROMS/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml read_input_state_from_file = .true. single_file_in = .false. @@ -87,7 +94,6 @@ &assim_tools_nml - filter_kind = 1 cutoff = 0.02 sort_obs_inc = .false. spread_restoration = .false. diff --git a/models/am2/work/input.nml b/models/am2/work/input.nml index 5f453d01cd..3643d228bc 100644 --- a/models/am2/work/input.nml +++ b/models/am2/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml start_from_restart = .true., output_restart = .true., @@ -72,7 +79,6 @@ / &assim_tools_nml - filter_kind = 1, cutoff = 0.2, sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/bgrid_solo/work/input.nml b/models/bgrid_solo/work/input.nml index 9d65871b00..eebd179d2c 100644 --- a/models/bgrid_solo/work/input.nml +++ b/models/bgrid_solo/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml read_input_state_from_file = .true., single_file_in = .true. @@ -92,7 +99,6 @@ &assim_tools_nml - filter_kind = 1, cutoff = 0.20, sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/cam-fv/work/input.nml b/models/cam-fv/work/input.nml index 3bbed78c6c..8fd9fa689b 100644 --- a/models/cam-fv/work/input.nml +++ b/models/cam-fv/work/input.nml @@ -33,18 +33,25 @@ ! inf_sd_from_restart inflation restart files from the values in inf*_initial ! if needed. +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &filter_nml - input_state_files = '' input_state_file_list = 'cam_init_files' + input_state_files = '' single_file_in = .false. - perturb_from_single_instance = .true. + perturb_from_single_instance = .false. init_time_days = -1 init_time_seconds = -1 stages_to_write = 'forecast','output' output_state_files = '' - output_state_file_list = 'cam_init_files' + output_state_file_list = 'cam_out_files' output_mean = .true. output_sd = .true. output_members = .true. @@ -53,7 +60,7 @@ write_all_stages_at_end = .false. output_interval = 1 - ens_size = 3 + ens_size = 20 num_groups = 1 distributed_state = .true. @@ -72,7 +79,7 @@ obs_sequence_in_name = 'obs_seq.out' obs_sequence_out_name = 'obs_seq.final' num_output_obs_members = 3 - compute_posterior = .false. + compute_posterior = .true. trace_execution = .true. output_timestamps = .true. @@ -198,11 +205,12 @@ !======================================================================== &model_nml - cam_template_filename = 'caminput.nc' + !cam_template_filename = 'caminput.nc' + cam_template_filename = 'Rean_SST0Z_2020.cam_0001.i.2019-12-02-00000.nc' cam_phis_filename = 'cam_phis.nc' - custom_routine_to_generate_ensemble = .true. - fields_to_perturb = 'QTY_TEMPERATURE' - perturbation_amplitude = 0.1 + custom_routine_to_generate_ensemble = .false. + fields_to_perturb = 'QTY_TEMPERATURE', 'QTY_U_WIND_COMPONENT', 'QTY_V_WIND_COMPONENT', 'QTY_SURFACE_PRESSURE' + perturbation_amplitude = 1.0, 1.0, 1.0, 100.0 state_variables = 'T', 'QTY_TEMPERATURE', 'NA', 'NA', 'UPDATE' 'US', 'QTY_U_WIND_COMPONENT', 'NA', 'NA', 'UPDATE' 'VS', 'QTY_V_WIND_COMPONENT', 'NA', 'NA', 'UPDATE' @@ -316,20 +324,23 @@ ! 'SABER_TEMPERATURE', &obs_kind_nml - assimilate_these_obs_types = 'RADIOSONDE_U_WIND_COMPONENT', + assimilate_these_obs_types = 'RADIOSONDE_U_WIND_COMPONENT', 'RADIOSONDE_V_WIND_COMPONENT', - 'RADIOSONDE_TEMPERATURE', - 'AIRCRAFT_U_WIND_COMPONENT', - 'AIRCRAFT_V_WIND_COMPONENT', - 'AIRCRAFT_TEMPERATURE', - 'ACARS_U_WIND_COMPONENT', - 'ACARS_V_WIND_COMPONENT', - 'ACARS_TEMPERATURE', - 'SAT_U_WIND_COMPONENT', - 'SAT_V_WIND_COMPONENT', - 'GPSRO_REFRACTIVITY' - - evaluate_these_obs_types = 'RADIOSONDE_SPECIFIC_HUMIDITY', + 'RADIOSONDE_TEMPERATURE' +! assimilate_these_obs_types = 'RADIOSONDE_U_WIND_COMPONENT', +! 'RADIOSONDE_V_WIND_COMPONENT', +! 'RADIOSONDE_TEMPERATURE', +! 'AIRCRAFT_U_WIND_COMPONENT', +! 'AIRCRAFT_V_WIND_COMPONENT', +! 'AIRCRAFT_TEMPERATURE', +! 'ACARS_U_WIND_COMPONENT', +! 'ACARS_V_WIND_COMPONENT', +! 'ACARS_TEMPERATURE', +! 'SAT_U_WIND_COMPONENT', +! 'SAT_V_WIND_COMPONENT', +! 'GPSRO_REFRACTIVITY' + +! evaluate_these_obs_types = 'RADIOSONDE_SPECIFIC_HUMIDITY', / @@ -349,7 +360,6 @@ &assim_tools_nml - filter_kind = 1 cutoff = 0.15 sort_obs_inc = .false. spread_restoration = .false. diff --git a/models/cam-se/work/input.nml b/models/cam-se/work/input.nml index 52dc5ca5a5..621ecffcb9 100644 --- a/models/cam-se/work/input.nml +++ b/models/cam-se/work/input.nml @@ -28,6 +28,13 @@ ! inf_sd_from_restart inflation restart files from the values in inf*_initial ! if needed. +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &filter_nml input_state_files = '' input_state_file_list = 'cam_init_files' @@ -344,7 +351,6 @@ &assim_tools_nml - filter_kind = 1 cutoff = 0.15 sort_obs_inc = .false. spread_restoration = .false. diff --git a/models/cice/work/input.nml b/models/cice/work/input.nml index 9fe096838a..6b061b9954 100644 --- a/models/cice/work/input.nml +++ b/models/cice/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml read_input_state_from_file = .true. write_output_state_to_file = .true. @@ -76,7 +83,6 @@ # cutoff of 0.03 (radians) is about 200km &assim_tools_nml - filter_kind = 1 cutoff = 0.05 sort_obs_inc = .false. spread_restoration = .false. diff --git a/models/clm/work/input.nml b/models/clm/work/input.nml index daa8fb4d98..8fc480e653 100644 --- a/models/clm/work/input.nml +++ b/models/clm/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml read_input_state_from_file = .true. write_output_state_to_file = .false. @@ -114,7 +121,6 @@ # cutoff of 0.03 (radians) is about 200km &assim_tools_nml - filter_kind = 1 cutoff = 0.05 sort_obs_inc = .false. spread_restoration = .false. diff --git a/models/cm1/work/input.nml b/models/cm1/work/input.nml index 218f34f536..465a6d1d82 100644 --- a/models/cm1/work/input.nml +++ b/models/cm1/work/input.nml @@ -2,6 +2,13 @@ ! For high-resolution models with large DART states, ! use 'distributed_state = .true.' +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &filter_nml async = 2 adv_ens_command = 'advance_model.csh' @@ -250,7 +257,6 @@ &assim_tools_nml adaptive_localization_threshold = -1 cutoff = 15000.0 - filter_kind = 1 print_every_nth_obs = 100 rectangular_quadrature = .true. sampling_error_correction = .false. diff --git a/models/coamps_nest/templates/EXPERIMENT_EXAMPLE/input.nml b/models/coamps_nest/templates/EXPERIMENT_EXAMPLE/input.nml index 5636b43894..2bc0b72bcf 100644 --- a/models/coamps_nest/templates/EXPERIMENT_EXAMPLE/input.nml +++ b/models/coamps_nest/templates/EXPERIMENT_EXAMPLE/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml start_from_restart = .true., output_restart = .true., @@ -60,7 +67,6 @@ &assim_tools_nml - filter_kind = 1, cutoff = 0.125, sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/dynamo/work/input.nml b/models/dynamo/work/input.nml index c3c0921059..290276fdaf 100644 --- a/models/dynamo/work/input.nml +++ b/models/dynamo/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml start_from_restart = .false., output_restart = .true., @@ -74,7 +81,6 @@ / &assim_tools_nml - filter_kind = 1, cutoff = 0.00001, sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/forced_barot/work/input.nml b/models/forced_barot/work/input.nml index 2ad26c71b6..9f17e2bb1f 100644 --- a/models/forced_barot/work/input.nml +++ b/models/forced_barot/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml start_from_restart = .true., output_restart = .true., @@ -76,7 +83,6 @@ / &assim_tools_nml - filter_kind = 1, cutoff = 0.02, sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/forced_lorenz_96/work/input.nml b/models/forced_lorenz_96/work/input.nml index e11a177c5e..57fa73ea4a 100644 --- a/models/forced_lorenz_96/work/input.nml +++ b/models/forced_lorenz_96/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml read_input_state_from_file = .true., single_file_in = .true. @@ -87,7 +94,6 @@ / &assim_tools_nml - filter_kind = 1, cutoff = 1000000.0, sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/gitm/work/input.nml b/models/gitm/work/input.nml index f3afac2137..5e5d47f4ef 100644 --- a/models/gitm/work/input.nml +++ b/models/gitm/work/input.nml @@ -1,3 +1,9 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / &filter_nml input_state_files = '' @@ -84,7 +90,6 @@ # cutoff of 0.03 (radians) is about 200km &assim_tools_nml - filter_kind = 1, cutoff = 0.60, sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/ikeda/work/input.nml b/models/ikeda/work/input.nml index 57f57246c3..2f65e7217f 100644 --- a/models/ikeda/work/input.nml +++ b/models/ikeda/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml read_input_state_from_file = .false., single_file_in = .true. @@ -86,7 +93,6 @@ / &assim_tools_nml - filter_kind = 1, cutoff = 1000000.0, sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/lorenz_04/work/input.nml b/models/lorenz_04/work/input.nml index 19b372df2a..2385b45b64 100644 --- a/models/lorenz_04/work/input.nml +++ b/models/lorenz_04/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml read_input_state_from_file = .true., single_file_in = .true. @@ -84,7 +91,6 @@ / &assim_tools_nml - filter_kind = 1, cutoff = 1000000.0, sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/lorenz_63/work/input.nml b/models/lorenz_63/work/input.nml index 90504677f9..2a020e875b 100644 --- a/models/lorenz_63/work/input.nml +++ b/models/lorenz_63/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml read_input_state_from_file = .true., single_file_in = .true. @@ -86,7 +93,6 @@ / &assim_tools_nml - filter_kind = 1, cutoff = 1000000.0 sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/lorenz_84/work/input.nml b/models/lorenz_84/work/input.nml index 6634c45d1f..2c8baa0680 100644 --- a/models/lorenz_84/work/input.nml +++ b/models/lorenz_84/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml read_input_state_from_file = .true., single_file_in = .true. @@ -89,7 +96,6 @@ / &assim_tools_nml - filter_kind = 1, cutoff = 1000000.0, sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/lorenz_96/work/input.nml b/models/lorenz_96/work/input.nml index 39bbef5f02..c1ba2dca4e 100644 --- a/models/lorenz_96/work/input.nml +++ b/models/lorenz_96/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml read_input_state_from_file = .true., single_file_in = .true. @@ -86,7 +93,6 @@ / &assim_tools_nml - filter_kind = 1, cutoff = 0.02, sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/lorenz_96_2scale/work/input.nml b/models/lorenz_96_2scale/work/input.nml index e7edc3fa92..024038aeca 100644 --- a/models/lorenz_96_2scale/work/input.nml +++ b/models/lorenz_96_2scale/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml read_input_state_from_file = .true., single_file_in = .true. @@ -85,7 +92,6 @@ / &assim_tools_nml - filter_kind = 1, cutoff = 1000000.0, sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/lorenz_96_tracer_advection/model_mod.f90 b/models/lorenz_96_tracer_advection/model_mod.f90 index fac7d63552..dbe41240c8 100644 --- a/models/lorenz_96_tracer_advection/model_mod.f90 +++ b/models/lorenz_96_tracer_advection/model_mod.f90 @@ -17,11 +17,11 @@ module model_mod nmlfileunit, find_namelist_in_file, & check_namelist_read, E_ERR, error_handler -use location_io_mod, only : nc_write_location_atts, nc_write_location +use location_io_mod, only : nc_write_location_atts, nc_write_location -use netcdf_utilities_mod, only : nc_add_global_attribute, nc_synchronize_file, & - nc_add_global_creation_time, nc_begin_define_mode, & - nc_end_define_mode +use netcdf_utilities_mod, only : nc_add_global_attribute, nc_synchronize_file, & + nc_add_global_creation_time, nc_begin_define_mode, & + nc_end_define_mode use obs_kind_mod, only : QTY_STATE_VARIABLE, QTY_TRACER_SOURCE, & QTY_TRACER_CONCENTRATION, get_name_for_quantity @@ -34,7 +34,8 @@ module model_mod use distributed_state_mod, only : get_state -use state_structure_mod, only : add_domain, add_dimension_to_variable, finished_adding_domain, state_structure_info +use state_structure_mod, only : add_domain, add_dimension_to_variable, & + finished_adding_domain, state_structure_info use default_model_mod, only : end_model, nc_write_model_vars, & init_time @@ -48,25 +49,25 @@ module model_mod ! arguments because they will be called *from* other DART code. !> required routines with code in this module -public :: get_model_size, & - get_state_meta_data, & - model_interpolate, & +public :: get_model_size, & + get_state_meta_data, & + model_interpolate, & shortest_time_between_assimilations, & - static_init_model, & - init_conditions, & - adv_1step, & + static_init_model, & + init_conditions, & + adv_1step, & nc_write_model_atts !> required routines where code is in other modules -public :: pert_model_copies, & - nc_write_model_vars, & - init_time, & - get_close_obs, & - get_close_state, & - end_model, & - convert_vertical_obs, & +public :: pert_model_copies, & + nc_write_model_vars, & + init_time, & + get_close_obs, & + get_close_state, & + end_model, & + convert_vertical_obs, & convert_vertical_state, & - read_model_time, & + read_model_time, & write_model_time ! version controlled file description for error handling, do not edit @@ -77,25 +78,41 @@ module model_mod ! Namelist with default values -integer(i8) :: model_size = 120 -real(r8) :: forcing = 8.00_r8 -real(r8) :: delta_t = 0.05_r8 -integer :: time_step_days = 0 -integer :: time_step_seconds = 3600 +integer(i8) :: model_size = 120 +real(r8) :: forcing = 8.00_r8 +real(r8) :: delta_t = 0.05_r8 -namelist /model_nml/ model_size, forcing, delta_t, time_step_days, time_step_seconds - -! Tracer parameters +! Tracer model parameters with default values ! mean velocity -real(r8) :: mean_velocity = 25.00_r8 +real(r8) :: mean_velocity = 0.00_r8 ! velocity normalization -real(r8) :: pert_velocity_multiplier = 5.00_r8 +real(r8) :: pert_velocity_multiplier = 5.00_r8 ! diffusion everywhere -real(r8) :: diffusion_coef = 0.00_r8 -! amount injected per unit time -real(r8) :: source_rate = 100.00_r8 -! include an exponential sink -real(r8) :: e_folding = 1.00_r8 +real(r8) :: diffusion_coef = 0.00_r8 +! include an exponential sink rate +real(r8) :: e_folding = 0.25_r8 +! Also include a fixed sink so tracer can get to 0 +real(r8) :: sink_rate = 0.1_r8 +! Tracer source model parameters +! Amount injected per unit time; This is not currently implemented +real(r8) :: source_rate = 100.00_r8 +real(r8) :: point_tracer_source_rate = 5.0_r8 + +! Allows having negative tracer values to test bounded above filter algorithms +logical :: positive_tracer = .true. + +! Allows testing non-zero bounds above +logical :: bound_above_is_one = .false. + +integer :: time_step_days = 0 +integer :: time_step_seconds = 3600 + +namelist /model_nml/ model_size, forcing, delta_t, mean_velocity, & + pert_velocity_multiplier, diffusion_coef, e_folding, & + sink_rate, source_rate, point_tracer_source_rate, & + positive_tracer, bound_above_is_one, & + time_step_days, time_step_seconds + ! number state variable quantities integer, parameter :: NVARS = 3 ! QTY_STATE_VARIABLE, QTY_TRACER_CONCENTRATION, QTY_TRACER_SOURCE @@ -111,8 +128,6 @@ module model_mod ! Module storage for a random sequence for perturbing a single initial state type(random_seq_type) :: random_seq -logical :: random_seq_init = .true. - contains @@ -125,21 +140,28 @@ module model_mod subroutine adv_1step(x, time) -real(r8), intent(inout) :: x(:) ! for a grid_size = 40, model_size = 120: +real(r8), intent(inout) :: x(:) ! for a grid_size = 40, model_size = 120: ! positions (1-40) tracer (41-80) and source (81-120) - ! this is generalizable to any model size that is a - ! multiple of 3 type(time_type), intent(inout) :: time -real(r8) :: velocity, target_loc, frac, ratio -integer(r8) :: low, hi, up, down, i, f -real(i8), dimension(grid_size) :: x1, x2, x3, x4, x_new, dx, inter, q_diff, q_new, q +real(r8) :: velocity, target_loc, frac, ratio +integer(r8) :: low, hi, up, down, i +real(r8), dimension(grid_size) :: x1, x2, x3, x4, x_new, dx, inter, q_diff, q_new, q + +! Test for tracer with upper bound of 1; Subtract 1 when entering here, then add it back on +if(bound_above_is_one) x(grid_size + 1:2*grid_size) = x(grid_size + 1:2*grid_size) - 1.0_r8 q = x(grid_size + 1 :2*grid_size) ! QTY_TRACER_CONCENTRATION ! Doing an upstream semi-lagrangian advection for q for each grid point do i = 1, grid_size ! Get the target point - velocity = mean_velocity + x(i)*pert_velocity_multiplier + velocity = (mean_velocity + x(i))*pert_velocity_multiplier + + ! Bail out if the velocity number of grid points per dt exceeds the whole domain size + if(abs(velocity * delta_t) > grid_size) then + call error_handler(E_ERR, 'adv_1step', 'Lagrangian Velocity ridiculously large') + endif + target_loc = i - velocity*delta_t ! Get the bounding grid point low = floor(target_loc) @@ -147,6 +169,7 @@ subroutine adv_1step(x, time) frac = target_loc - low ! Assume for now that we are not looking upstream for multiple revolutions + ! consistent with error failure above if (low < 1) then low = low + grid_size @@ -173,17 +196,25 @@ subroutine adv_1step(x, time) if (up > grid_size) then up = up - grid_size end if - q_diff(i) = diffusion_coef * (q_new(down) + q_new(up) - 2*q_new(i)) + ! Should be sure this is the right way to time normalize + q_diff(i) = diffusion_coef * delta_t * (q_new(down) + q_new(up) - 2*q_new(i)) end do q_new = q_new + q_diff*delta_t -! Add source following the source input +! Add source q_new = x((2*grid_size)+1 : model_size)*delta_t + q_new ! Add exponential sinks at every grid point ratio = exp((-1)*e_folding*delta_t) q_new = ratio*q_new +! Add in an additional uniform sink so that stuff can get to zero for tests +if(positive_tracer) then + q_new = max(0.0_r8, q_new - sink_rate * delta_t) +else + q_new = min(0.0_r8, q_new + sink_rate * delta_t) +endif + x(grid_size+1:2*(grid_size)) = q_new ! RK4 solver for the lorenz-96 equations @@ -212,6 +243,8 @@ subroutine adv_1step(x, time) x(1: grid_size) = x_new +! Test for tracer with upper bound of 1; Subtract 1 when entering, then add it back on +if(bound_above_is_one) x(grid_size + 1:2*grid_size) = x(grid_size + 1:2*grid_size) + 1.0_r8 end subroutine adv_1step @@ -223,7 +256,7 @@ subroutine comp_dt(x, dt) real(r8), intent(in) :: x(grid_size) real(r8), intent(out) :: dt(grid_size) -integer :: j, jp1, jm1, jm2, ms +integer :: j, jp1, jm1, jm2 do j = 1, grid_size jp1 = j + 1 @@ -244,8 +277,8 @@ end subroutine comp_dt subroutine static_init_model() -real(r8) :: x_loc, delta_loc -integer :: i, dom_id, var_id +real(r8) :: delta_loc +integer :: i, dom_id character(20) :: string1 ! Do any initial setup needed, including reading the namelist values @@ -296,10 +329,18 @@ subroutine init_conditions(x) real(r8), intent(out) :: x(:) -x = 0 -x(1:grid_size) = 0.0_r8 + +! Set all variables, winds, tracer concentration, and source to 0 +x(:) = 0.0_r8 +! Add a single perturbation to L96 state (winds) to generate evolution x(1) = 0.1_r8 -x(grid_size*2 + 1) = 100 +! For these tests, single tracer source at the first grid point +if(positive_tracer) then + x(grid_size*2 + 1) = point_tracer_source_rate +else + ! Make it negative if testing negative tracers + x(grid_size*2 + 1) = -point_tracer_source_rate +endif end subroutine init_conditions @@ -439,7 +480,7 @@ subroutine initialize() ! Output the namelist values if requested if (do_nml_file()) write(nmlfileunit, nml=model_nml) -if (do_nml_term()) write( * , nml=model_nml) +if (do_nml_term()) write( * , nml= model_nml) end subroutine initialize @@ -479,13 +520,16 @@ subroutine pert_model_copies(state_ens_handle, ens_size, pert_amp, interf_provid if (var_type == QTY_STATE_VARIABLE) then do j=1,ens_size - state_ens_handle%copies(j, i) = random_gaussian(random_seq, state_ens_handle%copies(j, i), pert_amp) + state_ens_handle%copies(j, i) = random_gaussian(random_seq, & + state_ens_handle%copies(j, i), pert_amp) end do + elseif(var_type == QTY_TRACER_CONCENTRATION) then + ! For now, can just let all ensemble members be identical + ! Spread will be generated by the chaotic flow field !Perturbing all source grid points else if (var_type == QTY_TRACER_SOURCE) then - do j=1,ens_size - state_ens_handle%copies(j, i) = state_ens_handle%copies(j, i) + random_gaussian(random_seq, 0.00_r8, 50.00_r8) - end do + ! For now, can just keep source constant so it will not evolve + ! Need to perturb to do source estimation end if end do @@ -516,7 +560,12 @@ subroutine nc_write_model_atts(ncid, domain_id) call nc_add_global_attribute(ncid, "model_forcing", forcing ) call nc_add_global_attribute(ncid, "model_delta_t", delta_t ) call nc_add_global_attribute(ncid, "source_rate", source_rate) +call nc_add_global_attribute(ncid, "sink_rate", sink_rate) call nc_add_global_attribute(ncid, "exponential_sink_folding", e_folding) +call nc_add_global_attribute(ncid, "mean_velocity", mean_velocity) +call nc_add_global_attribute(ncid, "pert_velocity_multiplier", pert_velocity_multiplier) +call nc_add_global_attribute(ncid, "diffusion_coef", diffusion_coef) + call nc_write_location_atts(ncid, grid_size) call nc_end_define_mode(ncid) diff --git a/models/lorenz_96_tracer_advection/model_mod.nml b/models/lorenz_96_tracer_advection/model_mod.nml index 224428718b..18473892c7 100644 --- a/models/lorenz_96_tracer_advection/model_mod.nml +++ b/models/lorenz_96_tracer_advection/model_mod.nml @@ -1,7 +1,15 @@ &model_nml - model_size = 120, - forcing = 8.00, - delta_t = 0.05, - time_step_days = 0, - time_step_seconds = 3600 / + model_size = 120, + forcing = 8.0, + delta_t = 0.05, + mean_velocity = 0.0, + pert_velocity_multiplier = 5.0, + diffusion_coef = 0.0, + e_folding = 0.25, + sink_rate = 0.1, + source_rate = 100.0, + point_tracer_source_rate = 5.0, + positive_tracer = .true., + time_step_days = 0, + time_step_seconds = 3600 / diff --git a/models/lorenz_96_tracer_advection/work/all_bnrhf_qceff_table.csv b/models/lorenz_96_tracer_advection/work/all_bnrhf_qceff_table.csv new file mode 100644 index 0000000000..e920cf5cea --- /dev/null +++ b/models/lorenz_96_tracer_advection/work/all_bnrhf_qceff_table.csv @@ -0,0 +1,5 @@ +QCEFF table version: 1,obs_error_info,,,,probit_inflation,,,,,probit_state,,,,,probit_extended_state,,,,,obs_inc_info,,,, +QTY_NAME:,bounded_below,bounded_above,lower_bound,upper_bound,dist_type,bounded_below,bounded_above,lower_bound,upper_bound,dist_type,bounded_below,bounded_above,lower_bound,upper_bound,dist_type,bounded_below,bounded_above,lower_bound,upper_bound,filter_kind,bounded_below,bounded_above,lower_bound,upper_bound +QTY_STATE_VARIABLE,.false.,.false.,-888888,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.false.,.false.,-888888,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.false.,.false.,-888888,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.false.,.false.,-888888,-888888,BOUNDED_NORMAL_RHF,.false.,.false.,-888888,-888888 +QTY_TRACER_CONCENTRATION,.true.,.false.,0,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.true.,.false.,0,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.true.,.false.,0,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.true.,.false.,0,-888888,BOUNDED_NORMAL_RHF,.true.,.false.,0,-888888 +QTY_TRACER_SOURCE,.true.,.false.,0,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.true.,.false.,0,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.true.,.false.,0,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.true.,.false.,0,-888888,BOUNDED_NORMAL_RHF,.true.,.false.,0,-888888 diff --git a/models/lorenz_96_tracer_advection/work/all_eakf_qceff_table.csv b/models/lorenz_96_tracer_advection/work/all_eakf_qceff_table.csv new file mode 100644 index 0000000000..2266755f54 --- /dev/null +++ b/models/lorenz_96_tracer_advection/work/all_eakf_qceff_table.csv @@ -0,0 +1,5 @@ +QCEFF table version: 1,obs_error_info,,,,probit_inflation,,,,,probit_state,,,,,probit_extended_state,,,,,obs_inc_info,,,, +QTY_NAME:,bounded_below,bounded_above,lower_bound,upper_bound,dist_type,bounded_below,bounded_above,lower_bound,upper_bound,dist_type,bounded_below,bounded_above,lower_bound,upper_bound,dist_type,bounded_below,bounded_above,lower_bound,upper_bound,filter_kind,bounded_below,bounded_above,lower_bound,upper_bound +QTY_STATE_VARIABLE,.false.,.false.,-888888,-888888,NORMAL_DISTRIBUTION,.false.,.false.,-888888,-888888,NORMAL_DISTRIBUTION,.false.,.false.,-888888,-888888,NORMAL_DISTRIBUTION,.false.,.false.,-888888,-888888,EAKF,.false.,.false.,-888888,-888888 +QTY_TRACER_CONCENTRATION,.true.,.false.,0,-888888,NORMAL_DISTRIBUTION,.true.,.false.,0,-888888,NORMAL_DISTRIBUTION,.true.,.false.,0,-888888,NORMAL_DISTRIBUTION,.true.,.false.,0,-888888,EAKF,.true.,.false.,0,-888888 +QTY_TRACER_SOURCE,.true.,.false.,0,-888888,NORMAL_DISTRIBUTION,.true.,.false.,0,-888888,NORMAL_DISTRIBUTION,.true.,.false.,0,-888888,NORMAL_DISTRIBUTION,.true.,.false.,0,-888888,EAKF,.true.,.false.,0,-888888 diff --git a/models/lorenz_96_tracer_advection/work/create_obs_sequence_input b/models/lorenz_96_tracer_advection/work/create_obs_sequence_input new file mode 100644 index 0000000000..78c2586de3 --- /dev/null +++ b/models/lorenz_96_tracer_advection/work/create_obs_sequence_input @@ -0,0 +1,406 @@ +80 +0 +0 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +0 +1 +-1 +0 0 +10.01 +0 +2 +-1 +0 0 +9.99 +set_def.out + + diff --git a/models/lorenz_96_tracer_advection/work/input.nml b/models/lorenz_96_tracer_advection/work/input.nml index c52c524490..955e328a72 100644 --- a/models/lorenz_96_tracer_advection/work/input.nml +++ b/models/lorenz_96_tracer_advection/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml read_input_state_from_file = .false., single_file_in = .true. @@ -29,7 +36,7 @@ &filter_nml single_file_in = .true., - input_state_files = 'filter_input.nc' + input_state_files = 'perfect_input.nc' input_state_file_list = '' stages_to_write = 'preassim', 'analysis', 'output' @@ -40,14 +47,14 @@ output_interval = 1, output_members = .true. - num_output_state_members = 20, + num_output_state_members = 80, output_mean = .true. output_sd = .true. - ens_size = 40, + ens_size = 80, num_groups = 1, - perturb_from_single_instance = .false., - perturbation_amplitude = 0.2, + perturb_from_single_instance = .true., + perturbation_amplitude = 0.01, distributed_state = .true. async = 0, @@ -55,7 +62,7 @@ obs_sequence_in_name = "obs_seq.out", obs_sequence_out_name = "obs_seq.final", - num_output_obs_members = 20, + num_output_obs_members = 80, init_time_days = 0, init_time_seconds = 0, first_obs_days = -1, @@ -63,13 +70,13 @@ last_obs_days = -1, last_obs_seconds = -1, - inf_flavor = 2, 0, + inf_flavor = 5, 0, inf_initial_from_restart = .false., .false., inf_sd_initial_from_restart = .false., .false., inf_deterministic = .true., .true., inf_initial = 1.0, 1.0, - inf_lower_bound = 1.0, 1.0, - inf_upper_bound = 1.2, 1.2, + inf_lower_bound = 0.0, 1.0, + inf_upper_bound = 1000.0, 1.2, inf_damping = 0.9, 1.0, inf_sd_initial = 0.6, 0.0, inf_sd_lower_bound = 0.6, 0.0, @@ -82,13 +89,11 @@ silence = .false., / - &ensemble_manager_nml / &assim_tools_nml - filter_kind = 1, - cutoff = 0.2, + cutoff = 1000000.0 sort_obs_inc = .false., spread_restoration = .false., sampling_error_correction = .false., @@ -118,16 +123,25 @@ &obs_kind_nml assimilate_these_obs_types = 'RAW_STATE_VARIABLE', - 'RAW_TRACER_CONCENTRATION', - 'RAW_TRACER_SOURCE' + 'RAW_TRACER_CONCENTRATION' + evaluate_these_obs_types = 'RAW_TRACER_SOURCE' / &model_nml - model_size = 120, - forcing = 8.00, - delta_t = 0.05, - time_step_days = 0, - time_step_seconds = 3600, + model_size = 120, + forcing = 8.0, + delta_t = 0.05, + mean_velocity = 0.0, + pert_velocity_multiplier = 5.0, + diffusion_coef = 0.0, + e_folding = 0.25, + sink_rate = 0.1, + source_rate = 100.0, + point_tracer_source_rate = 5.0, + positive_tracer = .true., + bound_above_is_one = .false. + time_step_days = 0, + time_step_seconds = 3600, / &utilities_nml diff --git a/models/lorenz_96_tracer_advection/work/neg_qceff_table.csv b/models/lorenz_96_tracer_advection/work/neg_qceff_table.csv new file mode 100644 index 0000000000..b6de663a76 --- /dev/null +++ b/models/lorenz_96_tracer_advection/work/neg_qceff_table.csv @@ -0,0 +1,5 @@ +QCEFF table version: 1,obs_error_info,,,,probit_inflation,,,,,probit_state,,,,,probit_extended_state,,,,,obs_inc_info,,,, +QTY_NAME:,bounded_below,bounded_above,lower_bound,upper_bound,dist_type,bounded_below,bounded_above,lower_bound,upper_bound,dist_type,bounded_below,bounded_above,lower_bound,upper_bound,dist_type,bounded_below,bounded_above,lower_bound,upper_bound,filter_kind,bounded_below,bounded_above,lower_bound,upper_bound +QTY_STATE_VARIABLE,.false.,.false.,-888888,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.false.,.false.,-888888,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.false.,.false.,-888888,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.false.,.false.,-888888,-888888,BOUNDED_NORMAL_RHF,.false.,.false.,-888888,-888888 +QTY_TRACER_CONCENTRATION,.false.,.true.,-888888,0,BOUNDED_NORMAL_RH_DISTRIBUTION,.false.,.true.,-888888,0,BOUNDED_NORMAL_RH_DISTRIBUTION,.false.,.true.,-888888,0,BOUNDED_NORMAL_RH_DISTRIBUTION,.false.,.true.,-888888,0,BOUNDED_NORMAL_RHF,.false.,.true.,-888888,0 +QTY_TRACER_SOURCE,.false.,.true.,-888888,0,BOUNDED_NORMAL_RH_DISTRIBUTION,.false.,.true.,-888888,0,BOUNDED_NORMAL_RH_DISTRIBUTION,.false.,.true.,-888888,0,BOUNDED_NORMAL_RH_DISTRIBUTION,.false.,.true.,-888888,0,BOUNDED_NORMAL_RHF,.false.,.true.,-888888,0 diff --git a/models/lorenz_96_tracer_advection/work/state_eakf_tracer_bnrhf_qceff_table.csv b/models/lorenz_96_tracer_advection/work/state_eakf_tracer_bnrhf_qceff_table.csv new file mode 100644 index 0000000000..60c044e1fa --- /dev/null +++ b/models/lorenz_96_tracer_advection/work/state_eakf_tracer_bnrhf_qceff_table.csv @@ -0,0 +1,5 @@ +QCEFF table version: 1,obs_error_info,,,,probit_inflation,,,,,probit_state,,,,,probit_extended_state,,,,,obs_inc_info,,,, +QTY_NAME:,bounded_below,bounded_above,lower_bound,upper_bound,dist_type,bounded_below,bounded_above,lower_bound,upper_bound,dist_type,bounded_below,bounded_above,lower_bound,upper_bound,dist_type,bounded_below,bounded_above,lower_bound,upper_bound,filter_kind,bounded_below,bounded_above,lower_bound,upper_bound +QTY_STATE_VARIABLE,.false.,.false.,-888888,-888888,NORMAL_DISTRIBUTION,.false.,.false.,-888888,-888888,NORMAL_DISTRIBUTION,.false.,.false.,-888888,-888888,NORMAL_DISTRIBUTION,.false.,.false.,-888888,-888888,EAKF,.false.,.false.,-888888,-888888 +QTY_TRACER_CONCENTRATION,.true.,.false.,0,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.true.,.false.,0,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.true.,.false.,0,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.true.,.false.,0,-888888,BOUNDED_NORMAL_RHF,.true.,.false.,0,-888888 +QTY_TRACER_SOURCE,.true.,.false.,0,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.true.,.false.,0,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.true.,.false.,0,-888888,BOUNDED_NORMAL_RH_DISTRIBUTION,.true.,.false.,0,-888888,BOUNDED_NORMAL_RHF,.true.,.false.,0,-888888 diff --git a/models/mpas_atm/data/input.nml b/models/mpas_atm/data/input.nml index eddd5edd72..7bd387b6c0 100644 --- a/models/mpas_atm/data/input.nml +++ b/models/mpas_atm/data/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml read_input_state_from_file = .true. single_file_in = .false. diff --git a/models/mpas_atm/work/input.nml b/models/mpas_atm/work/input.nml index 67869b6174..10a2171162 100644 --- a/models/mpas_atm/work/input.nml +++ b/models/mpas_atm/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml read_input_state_from_file = .true. single_file_in = .false. @@ -92,7 +99,6 @@ / &assim_tools_nml - filter_kind = 1 cutoff = 0.10 distribute_mean = .false. convert_all_obs_verticals_first = .true. diff --git a/models/mpas_ocn/work/input.nml b/models/mpas_ocn/work/input.nml index a3b35c0cfb..1aa03fe07f 100644 --- a/models/mpas_ocn/work/input.nml +++ b/models/mpas_ocn/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml start_from_restart = .true., output_restart = .true., @@ -74,7 +81,6 @@ / &assim_tools_nml - filter_kind = 1, cutoff = 1000000.0, sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/noah/work/input.nml b/models/noah/work/input.nml index 86f921a95a..0f52583682 100644 --- a/models/noah/work/input.nml +++ b/models/noah/work/input.nml @@ -1,5 +1,12 @@ # This namelist is for both NOAH and NOAH-MP +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &model_nml lsm_model_choice = 'noahMP_36' domain_shapefiles = 'RESTART.2003051600_DOMAIN1_01' @@ -115,7 +122,6 @@ # cutoff = 0.06 (radians) is about 400 km &assim_tools_nml - filter_kind = 1 cutoff = 0.05 sort_obs_inc = .false. spread_restoration = .false. diff --git a/models/null_model/work/input.nml b/models/null_model/work/input.nml index c3e8905428..cbb0e06326 100644 --- a/models/null_model/work/input.nml +++ b/models/null_model/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml read_input_state_from_file = .true., single_file_in = .true. @@ -97,7 +104,6 @@ / &assim_tools_nml - filter_kind = 1, cutoff = 1000000.0, sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/pe2lyr/work/input.nml b/models/pe2lyr/work/input.nml index ddfc37e57b..e25edd4337 100644 --- a/models/pe2lyr/work/input.nml +++ b/models/pe2lyr/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml start_from_restart = .true., output_restart = .true., @@ -70,7 +77,6 @@ perturbation_amplitude = 0.2 / &assim_tools_nml - filter_kind = 1, cutoff = 0.02, sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/rose/work/input.nml b/models/rose/work/input.nml index 786486552b..8f52f2fb5e 100644 --- a/models/rose/work/input.nml +++ b/models/rose/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml start_from_restart = .false., output_restart = .true., @@ -73,7 +80,6 @@ / &assim_tools_nml - filter_kind = 1, cutoff = 0.2, sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/simple_advection/work/input.nml b/models/simple_advection/work/input.nml index 13f84b1220..cd436c8f4a 100644 --- a/models/simple_advection/work/input.nml +++ b/models/simple_advection/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml read_input_state_from_file = .true. single_file_in = .true. @@ -91,7 +98,6 @@ # large for the model to converge. to test that the model is # doing a successful assimilation, change cutoff to 0.02 and rerun. &assim_tools_nml - filter_kind = 1 cutoff = 100000000.0 sort_obs_inc = .false. spread_restoration = .false. diff --git a/models/sqg/work/input.nml b/models/sqg/work/input.nml index 62b0acaee3..5981b6ca67 100644 --- a/models/sqg/work/input.nml +++ b/models/sqg/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml start_from_restart = .true., output_restart = .true., @@ -79,7 +86,6 @@ / &assim_tools_nml - filter_kind = 1, cutoff = 100000.0, sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/template/work/oned_input.nml b/models/template/work/oned_input.nml index 66b5a07bb8..65ee6dcb9d 100644 --- a/models/template/work/oned_input.nml +++ b/models/template/work/oned_input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml read_input_state_from_file = .true., single_file_in = .true. @@ -86,7 +93,6 @@ / &assim_tools_nml - filter_kind = 1, cutoff = 0.02, sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/template/work/threed_input.nml b/models/template/work/threed_input.nml index 90608b10f6..8b50853c78 100644 --- a/models/template/work/threed_input.nml +++ b/models/template/work/threed_input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml read_input_state_from_file = .true., single_file_in = .true. @@ -86,7 +93,6 @@ / &assim_tools_nml - filter_kind = 1, cutoff = 1000000.0 sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/tiegcm/work/input.nml b/models/tiegcm/work/input.nml index c92d64e746..7420562fa9 100644 --- a/models/tiegcm/work/input.nml +++ b/models/tiegcm/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &quality_control_nml / @@ -104,7 +111,6 @@ / &assim_tools_nml - filter_kind = 1 cutoff = 0.2 sort_obs_inc = .false. spread_restoration = .false. diff --git a/models/wrf/experiments/Radar/input.nml b/models/wrf/experiments/Radar/input.nml index a2d9428e61..255f3e1c75 100644 --- a/models/wrf/experiments/Radar/input.nml +++ b/models/wrf/experiments/Radar/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &filter_nml async = 2, adv_ens_command = "./advance_model.csh", diff --git a/models/wrf/regression/CONUS-V2/input.nml b/models/wrf/regression/CONUS-V2/input.nml index 5cff3020ae..fb1e7a8546 100644 --- a/models/wrf/regression/CONUS-V2/input.nml +++ b/models/wrf/regression/CONUS-V2/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &filter_nml async = 2, adv_ens_command = "./advance_model.csh", diff --git a/models/wrf/regression/Radar/input.nml b/models/wrf/regression/Radar/input.nml index 165fb5451a..a30b93786f 100644 --- a/models/wrf/regression/Radar/input.nml +++ b/models/wrf/regression/Radar/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml start_from_restart = .true., output_restart = .true., diff --git a/models/wrf/work/input.nml b/models/wrf/work/input.nml index 30b6f0cad0..aec2653ea6 100644 --- a/models/wrf/work/input.nml +++ b/models/wrf/work/input.nml @@ -1,3 +1,10 @@ +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &perfect_model_obs_nml read_input_state_from_file = .true. single_file_in = .false. @@ -96,7 +103,6 @@ # localization radius, where the influence of an obs decreases # to ~half at 300 km, and ~0 at the edges of the area. &assim_tools_nml - filter_kind = 1, cutoff = 0.05, sort_obs_inc = .false., spread_restoration = .false., diff --git a/models/wrf_hydro/work/input.nml b/models/wrf_hydro/work/input.nml index 8b22e40e68..7ab439a0ca 100644 --- a/models/wrf_hydro/work/input.nml +++ b/models/wrf_hydro/work/input.nml @@ -7,6 +7,13 @@ # domain_order = 'hydro', 'parameters' # domain_shapefiles = 'restart.hydro.nc', 'parameters.nc' +&probit_transform_nml + / + +&algorithm_info_nml + qceff_table_filename = '' + / + &model_nml assimilation_period_days = 0 assimilation_period_seconds = 3600 @@ -122,7 +129,6 @@ / -# filter_kind=9 => JPoterjoy particle filter # cutoff is in radians; for the earth, 0.05 is about 300 km. # cutoff is defined to be the half-width of the localization radius # so 0.05 radians for cutoff is about an 600 km effective @@ -134,7 +140,6 @@ # max_link_distance = 2000m = cutoff = 0.000157 radians &assim_tools_nml - filter_kind = 1 cutoff = 0.000157 sort_obs_inc = .false. spread_restoration = .false. diff --git a/observations/forward_operators/obs_def_1d_state_mod.f90 b/observations/forward_operators/obs_def_1d_state_mod.f90 index 5d8b154040..132c4071e9 100644 --- a/observations/forward_operators/obs_def_1d_state_mod.f90 +++ b/observations/forward_operators/obs_def_1d_state_mod.f90 @@ -83,7 +83,7 @@ module obs_def_1d_state_mod ! Storage for the power forward operator integer :: num_power_obs = 0 ! current count of obs -integer :: max_power_obs = 100000 ! allocation size limit +integer :: max_power_obs = 220000 ! allocation size limit real(r8), allocatable :: power(:) ! metadata storage @@ -465,6 +465,9 @@ subroutine get_expected_power(state_handle, ens_size, location, powkey, val, ist real(r8), intent(out) :: val(ens_size) integer, intent(out) :: istatus(ens_size) +integer :: i +real(r8) :: pval(ens_size) + ! The forward operator interface for this type of observation. It is ! called with a state vector, a location, and a key to identify which ! observation is being processed. The return 'val' is the expected @@ -479,10 +482,21 @@ subroutine get_expected_power(state_handle, ens_size, location, powkey, val, ist call check_valid_key_power(powkey, 'GIVEN', 'get_expected_power') ! Interpolate the raw state to the location for each ensemble member -call interpolate(state_handle, ens_size, location, QTY_STATE_VARIABLE, val, istatus) +call interpolate(state_handle, ens_size, location, QTY_STATE_VARIABLE, pval, istatus) -! Raise the interpolated state values to the power for this observation -val = val ** power(powkey) +if(power(powkey) == int(power(powkey))) then + ! Integer power, just use standard definition + val = pval ** power(powkey) +else + ! For non-integer powers, fix up values for negative bases + do i = 1, ens_size + if(pval(i) >= 0.0_r8) then + val(i) = pval(i) ** power(powkey) + else + val(i) = -1.0_r8 * (-1.0_r8 * pval(i)) ** power(powkey) + endif + end do +endif if(debug) print*, 'get_expected_power key is ', powkey if(debug) print*, 'metadata value is: ', power(powkey)