diff --git a/.gitignore b/.gitignore index c0823ccc76..b580da07fc 100644 --- a/.gitignore +++ b/.gitignore @@ -207,6 +207,7 @@ test_normal_dist test_beta_dist test_kde_dist test_window +test_force_bounds # Directories to NOT IGNORE ... same as executable names # as far as I know, these must be listed after the executables diff --git a/CHANGELOG.rst b/CHANGELOG.rst index fe81722453..66d9cf9e63 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -22,6 +22,21 @@ individual files. The changes are now listed with the most recent at the top. +**January 22 2025 :: Bug-fix: Gamma and Beta Distributions. Tag v11.8.9** + +Bug fixes: + + - Beta distribution only supporting standard Beta, bounded 0-1. + - Gamma distribution only supporting standard, lower bound 0. + - Beta and Gamma bounds are forced in the QCEFF table. + +Updates: + + - Explicitly setting distribution type, now have UNSET. + - Message about failing to converge changed to E_ALLMSG to be visible + on all mpi ranks. + - remove unused test_obs directory + **January 14 2025 :: Bug-fix MOM6 potential temperature. Tag v11.8.8** - MOM6 model_interpolate for potential temperature diff --git a/assimilation_code/modules/assimilation/algorithm_info_mod.f90 b/assimilation_code/modules/assimilation/algorithm_info_mod.f90 index 41887d4cad..e7c4a56ddd 100644 --- a/assimilation_code/modules/assimilation/algorithm_info_mod.f90 +++ b/assimilation_code/modules/assimilation/algorithm_info_mod.f90 @@ -217,9 +217,19 @@ subroutine read_qceff_table(qceff_table_filename) case ('BOUNDED_NORMAL_RH_DISTRIBUTION') qceff_table_data(row)%probit_inflation%dist_type = BOUNDED_NORMAL_RH_DISTRIBUTION case ('GAMMA_DISTRIBUTION') + ! Force standard Gamma distribution qceff_table_data(row)%probit_inflation%dist_type = GAMMA_DISTRIBUTION + qceff_table_data(row)%probit_inflation%bounded_above = .false. + qceff_table_data(row)%probit_inflation%bounded_below = .true. + qceff_table_data(row)%probit_inflation%upper_bound = MISSING_R8 + qceff_table_data(row)%probit_inflation%lower_bound = 0.0_r8 case ('BETA_DISTRIBUTION') + ! Force standard Beta distribution qceff_table_data(row)%probit_inflation%dist_type = BETA_DISTRIBUTION + qceff_table_data(row)%probit_inflation%bounded_above = .true. + qceff_table_data(row)%probit_inflation%bounded_below = .true. + qceff_table_data(row)%probit_inflation%upper_bound = 1.0_r8 + qceff_table_data(row)%probit_inflation%lower_bound = 0.0_r8 case ('LOG_NORMAL_DISTRIBUTION') qceff_table_data(row)%probit_inflation%dist_type = LOG_NORMAL_DISTRIBUTION case ('UNIFORM_DISTRIBUTION') @@ -242,9 +252,19 @@ subroutine read_qceff_table(qceff_table_filename) case ('BOUNDED_NORMAL_RH_DISTRIBUTION') qceff_table_data(row)%probit_state%dist_type = BOUNDED_NORMAL_RH_DISTRIBUTION case ('GAMMA_DISTRIBUTION') + ! Force standard Gamma distribution qceff_table_data(row)%probit_state%dist_type = GAMMA_DISTRIBUTION + qceff_table_data(row)%probit_state%bounded_above = .false. + qceff_table_data(row)%probit_state%bounded_below = .true. + qceff_table_data(row)%probit_state%upper_bound = MISSING_R8 + qceff_table_data(row)%probit_state%lower_bound = 0.0_r8 case ('BETA_DISTRIBUTION') + ! Force standard Beta distribution qceff_table_data(row)%probit_state%dist_type = BETA_DISTRIBUTION + qceff_table_data(row)%probit_state%bounded_above = .true. + qceff_table_data(row)%probit_state%bounded_below = .true. + qceff_table_data(row)%probit_state%upper_bound = 1.0_r8 + qceff_table_data(row)%probit_state%lower_bound = 0.0_r8 case ('LOG_NORMAL_DISTRIBUTION') qceff_table_data(row)%probit_state%dist_type = LOG_NORMAL_DISTRIBUTION case ('UNIFORM_DISTRIBUTION') @@ -266,9 +286,19 @@ subroutine read_qceff_table(qceff_table_filename) case ('BOUNDED_NORMAL_RH_DISTRIBUTION') qceff_table_data(row)%probit_extended_state%dist_type = BOUNDED_NORMAL_RH_DISTRIBUTION case ('GAMMA_DISTRIBUTION') + ! Force standard Gamma distribution qceff_table_data(row)%probit_extended_state%dist_type = GAMMA_DISTRIBUTION + qceff_table_data(row)%probit_extended_state%bounded_above = .false. + qceff_table_data(row)%probit_extended_state%bounded_below = .true. + qceff_table_data(row)%probit_extended_state%upper_bound = MISSING_R8 + qceff_table_data(row)%probit_extended_state%lower_bound = 0.0_r8 case ('BETA_DISTRIBUTION') + ! Force standard Beta distribution qceff_table_data(row)%probit_extended_state%dist_type = BETA_DISTRIBUTION + qceff_table_data(row)%probit_extended_state%bounded_above = .true. + qceff_table_data(row)%probit_extended_state%bounded_below = .true. + qceff_table_data(row)%probit_extended_state%upper_bound = 1.0_r8 + qceff_table_data(row)%probit_extended_state%lower_bound = 0.0_r8 case ('LOG_NORMAL_DISTRIBUTION') qceff_table_data(row)%probit_extended_state%dist_type = LOG_NORMAL_DISTRIBUTION case ('UNIFORM_DISTRIBUTION') diff --git a/assimilation_code/modules/assimilation/assim_tools_mod.f90 b/assimilation_code/modules/assimilation/assim_tools_mod.f90 index 5787c0bebd..cfff61e275 100644 --- a/assimilation_code/modules/assimilation/assim_tools_mod.f90 +++ b/assimilation_code/modules/assimilation/assim_tools_mod.f90 @@ -1065,7 +1065,7 @@ subroutine obs_increment_gamma(ens, ens_size, prior_mean, prior_var, obs, obs_va ! Compute the prior quantiles of each ensemble member in the prior gamma distribution call gamma_mn_var_to_shape_scale(prior_mean, prior_var, prior_shape, prior_scale) do i = 1, ens_size - q(i) = gamma_cdf(ens(i), prior_shape, prior_scale, .true., .false., 0.0_r8, missing_r8) + q(i) = gamma_cdf(ens(i), prior_shape, prior_scale) end do ! Compute the statistics of the continous posterior distribution @@ -1082,7 +1082,7 @@ subroutine obs_increment_gamma(ens, ens_size, prior_mean, prior_var, obs, obs_va ! Now invert the quantiles with the posterior distribution do i = 1, ens_size - post(i) = inv_gamma_cdf(q(i), post_shape, post_scale, .true., .false., 0.0_r8, missing_r8) + post(i) = inv_gamma_cdf(q(i), post_shape, post_scale) end do obs_inc = post - ens diff --git a/assimilation_code/modules/assimilation/beta_distribution_mod.f90 b/assimilation_code/modules/assimilation/beta_distribution_mod.f90 index f5c3c5fc37..67eb242759 100644 --- a/assimilation_code/modules/assimilation/beta_distribution_mod.f90 +++ b/assimilation_code/modules/assimilation/beta_distribution_mod.f90 @@ -4,6 +4,9 @@ ! Thanks to Chris Riedel who developed the methods in this module. +! This module only supports standard beta distributions for which the +! lower bound is 0 and the upper bound is 1. + module beta_distribution_mod use types_mod, only : r8, PI, missing_r8 @@ -12,7 +15,7 @@ module beta_distribution_mod use random_seq_mod, only : random_seq_type, random_uniform -use distribution_params_mod, only : distribution_params_type +use distribution_params_mod, only : distribution_params_type, BETA_DISTRIBUTION use normal_distribution_mod, only : inv_cdf @@ -36,11 +39,11 @@ subroutine test_beta ! This routine provides limited tests of the numerics in this module. It begins ! by comparing a handful of cases of the pdf and cdf to results from Matlab. It -! then tests the quality of the inverse cdf for a single shape/scale pair. Failing +! then tests the quality of the inverse cdf for a single alpha/beta pair. Failing ! these tests suggests a serious problem. Passing them does not indicate that ! there are acceptable results for all possible inputs. -real(r8) :: x, y, p, inv +real(r8) :: x, y, inv real(r8) :: alpha, beta, max_diff integer :: i @@ -61,17 +64,16 @@ subroutine test_beta ! Compare to matlab write(*, *) 'Absolute value of differences should be less than 1e-15' do i = 1, 7 - pdf_diff(i) = beta_pdf(mx(i), malpha(i), mbeta(i)) - mpdf(i) - cdf_diff(i) = beta_cdf(mx(i), malpha(i), mbeta(i), 0.0_r8, 1.0_r8) - mcdf(i) + pdf_diff(i) = abs(beta_pdf(mx(i), malpha(i), mbeta(i)) - mpdf(i)) + cdf_diff(i) = abs(beta_cdf(mx(i), malpha(i), mbeta(i)) - mcdf(i)) write(*, *) i, pdf_diff(i), cdf_diff(i) end do -if(abs(maxval(pdf_diff)) < 1e-15_r8 .and. abs(maxval(cdf_diff)) < 1e-15_r8) then +if(maxval(pdf_diff) < 1e-15_r8 .and. maxval(cdf_diff) < 1e-15_r8) then write(*, *) 'Matlab Comparison Tests: PASS' else write(*, *) 'Matlab Comparison Tests: FAIL' endif - ! Test many x values for cdf and inverse cdf for a single set of alpha and beta alpha = 5.0_r8 beta = 2.0_r8 @@ -79,23 +81,20 @@ subroutine test_beta max_diff = -1.0_r8 do i = 0, 1000 x = i / 1000.0_r8 - p = beta_pdf(x, alpha, beta) - y = beta_cdf(x, alpha, beta, 0.0_r8, 1.0_r8) - inv = inv_beta_cdf(y, alpha, beta, 0.0_r8, 1.0_r8) + y = beta_cdf(x, alpha, beta) + inv = inv_beta_cdf(y, alpha, beta) max_diff = max(abs(x - inv), max_diff) end do write(*, *) '----------------------------' write(*, *) 'max difference in inversion is ', max_diff write(*, *) 'max difference should be less than 1e-14' - if(max_diff < 1e-14_r8) then write(*, *) 'Inversion Tests: PASS' else write(*, *) 'Inversion Tests: FAIL' endif - end subroutine test_beta !----------------------------------------------------------------------- @@ -106,20 +105,25 @@ function inv_beta_cdf_params(quantile, p) result(x) real(r8), intent(in) :: quantile type(distribution_params_type), intent(in) :: p +! Only standard beta is currently supported. Fail if bounds are not 0 and 1 +if(p%lower_bound /= 0.0_r8 .or. p%upper_bound /= 1.0_r8) then + errstring = 'Only standard beta distribution with bounds of 0 and 1 is supported' + call error_handler(E_ERR, 'inv_beta_cdf_params', errstring, source) +endif + x = inv_cdf(quantile, beta_cdf_params, inv_beta_first_guess_params, p) end function inv_beta_cdf_params !----------------------------------------------------------------------- -function inv_beta_cdf(quantile, alpha, beta, lower_bound, upper_bound) result(x) +function inv_beta_cdf(quantile, alpha, beta) result(x) real(r8) :: x real(r8), intent(in) :: quantile real(r8), intent(in) :: alpha, beta -real(r8), intent(in) :: lower_bound, upper_bound -! Given a quantile, finds the value of x for which the scaled beta cdf +! Given a quantile, finds the value of x for which the beta cdf ! with alpha and beta has approximately this quantile type(distribution_params_type) :: p @@ -129,15 +133,13 @@ function inv_beta_cdf(quantile, alpha, beta, lower_bound, upper_bound) result(x) call error_handler(E_ERR, 'inv_beta_cdf', errstring, source) endif +! Load the p type for the generic cdf calls p%params(1) = alpha; p%params(2) = beta -! Beta must be bounded on both sides -p%lower_bound = lower_bound; p%upper_bound = upper_bound +p%bounded_below = .true.; p%bounded_above = .true. +p%lower_bound = 0.0_r8; p%upper_bound = 1.0_r8 x = inv_beta_cdf_params(quantile, p) -! Undo the scaling -x = x * (upper_bound - lower_bound) + lower_bound - end function inv_beta_cdf !--------------------------------------------------------------------------- @@ -184,25 +186,34 @@ function beta_cdf_params(x, p) real(r8), intent(in) :: x type(distribution_params_type), intent(in) :: p +! A translation routine that is required to use the generic cdf optimization routine +! Extracts the appropriate information from the distribution_params_type that is needed +! for a call to the function beta_cdf below. + real(r8) :: alpha, beta +! Only standard beta is currently supported. Fail if bounds are not 0 and 1 +if(p%lower_bound /= 0.0_r8 .or. p%upper_bound /= 1.0_r8) then + errstring = 'Only standard beta distribution with bounds of 0 and 1 is supported' + call error_handler(E_ERR, 'beta_cdf_params', errstring, source) +endif + alpha = p%params(1); beta = p%params(2) -beta_cdf_params = beta_cdf(x, alpha, beta, p%lower_bound, p%upper_bound) +beta_cdf_params = beta_cdf(x, alpha, beta) end function beta_cdf_params !--------------------------------------------------------------------------- -function beta_cdf(x, alpha, beta, lower_bound, upper_bound) +function beta_cdf(x, alpha, beta) ! Returns the cumulative distribution of a beta function with alpha and beta ! at the value x -! Returns a large negative value if called with illegal values +! Returns a failed_value if called with illegal values real(r8) :: beta_cdf real(r8), intent(in) :: x, alpha, beta -real(r8), intent(in) :: lower_bound, upper_bound ! Parameters must be positive if(alpha <= 0.0_r8 .or. beta <= 0.0_r8) then @@ -251,7 +262,7 @@ function random_beta(r, alpha, beta) ! Draw from U(0, 1) to get a quantile quantile = random_uniform(r) ! Invert cdf to get a draw from beta -random_beta = inv_beta_cdf(quantile, alpha, beta, 0.0_r8, 1.0_r8) +random_beta = inv_beta_cdf(quantile, alpha, beta) end function random_beta @@ -339,24 +350,25 @@ function inv_beta_first_guess_params(quantile, p) real(r8), intent(in) :: quantile type(distribution_params_type), intent(in) :: p +! A translation routine that is required to use the generic first_guess for +! the cdf optimization routine. +! Extracts the appropriate information from the distribution_params_type that is needed +! for a call to the function inv_beta_first_guess below. + real(r8) :: alpha, beta alpha = p%params(1); beta = p%params(2) -inv_beta_first_guess_params = inv_beta_first_guess(quantile, alpha, beta, & - p%bounded_below, p%bounded_above, p%lower_bound, p%upper_bound) +inv_beta_first_guess_params = inv_beta_first_guess(quantile, alpha, beta) end function inv_beta_first_guess_params !--------------------------------------------------------------------------- -function inv_beta_first_guess(x, alpha, beta, & - bounded_below, bounded_above, lower_bound, upper_bound) +function inv_beta_first_guess(x, alpha, beta) real(r8) :: inv_beta_first_guess real(r8), intent(in) :: x real(r8), intent(in) :: alpha, beta -logical, intent(in) :: bounded_below, bounded_above -real(r8), intent(in) :: lower_bound, upper_bound ! Need some sort of first guess, should be smarter here ! For starters, take the mean for this alpha and beta @@ -389,19 +401,22 @@ end subroutine beta_alpha_beta !--------------------------------------------------------------------------- -subroutine set_beta_params_from_ens(ens, num, lower_bound, upper_bound, p) +subroutine set_beta_params_from_ens(ens, num, p) -integer, intent(in) :: num -real(r8), intent(in) :: ens(num) -real(r8), intent(in) :: lower_bound, upper_bound -type(distribution_params_type), intent(inout) :: p +integer, intent(in) :: num +real(r8), intent(in) :: ens(num) +type(distribution_params_type), intent(out) :: p real(r8) :: alpha, beta +! Set up the description of the beta distribution defined by the ensemble +p%distribution_type = BETA_DISTRIBUTION + ! Set the bounds info -p%lower_bound = lower_bound; p%upper_bound = upper_bound +p%bounded_below = .true.; p%bounded_above = .true. +p%lower_bound = 0.0_r8; p%upper_bound = 1.0_r8 -! Get alpha and beta for the scaled ensemble +! Get alpha and beta for the ensemble call beta_alpha_beta(ens, num, alpha, beta) p%params(1) = alpha p%params(2) = beta diff --git a/assimilation_code/modules/assimilation/distribution_params_mod.f90 b/assimilation_code/modules/assimilation/distribution_params_mod.f90 index 0507810780..3a6575b129 100644 --- a/assimilation_code/modules/assimilation/distribution_params_mod.f90 +++ b/assimilation_code/modules/assimilation/distribution_params_mod.f90 @@ -7,17 +7,8 @@ module distribution_params_mod implicit none private -type distribution_params_type - integer :: distribution_type - logical :: bounded_below, bounded_above - real(r8) :: lower_bound, upper_bound - real(r8) :: params(2) - integer :: ens_size - real(r8), allocatable :: ens(:) - real(r8), allocatable :: more_params(:) -end type - ! Defining parameter strings for different prior distributions that can be used for probit transform +integer, parameter :: UNSET = -1 integer, parameter :: NORMAL_DISTRIBUTION = 1 integer, parameter :: BOUNDED_NORMAL_RH_DISTRIBUTION = 2 integer, parameter :: GAMMA_DISTRIBUTION = 3 @@ -27,6 +18,16 @@ module distribution_params_mod integer, parameter :: PARTICLE_FILTER_DISTRIBUTION = 7 integer, parameter :: KDE_DISTRIBUTION = 8 +type distribution_params_type + integer :: distribution_type = UNSET + logical :: bounded_below, bounded_above + real(r8) :: lower_bound, upper_bound + real(r8) :: params(2) + integer :: ens_size + real(r8), allocatable :: ens(:) + real(r8), allocatable :: more_params(:) +end type + public :: distribution_params_type, deallocate_distribution_params, & NORMAL_DISTRIBUTION, BOUNDED_NORMAL_RH_DISTRIBUTION, GAMMA_DISTRIBUTION, BETA_DISTRIBUTION, & LOG_NORMAL_DISTRIBUTION, UNIFORM_DISTRIBUTION, PARTICLE_FILTER_DISTRIBUTION, & diff --git a/assimilation_code/modules/assimilation/gamma_distribution_mod.f90 b/assimilation_code/modules/assimilation/gamma_distribution_mod.f90 index 5ffb66d2d8..ca4129cec1 100644 --- a/assimilation_code/modules/assimilation/gamma_distribution_mod.f90 +++ b/assimilation_code/modules/assimilation/gamma_distribution_mod.f90 @@ -2,6 +2,9 @@ ! by UCAR, "as is", without charge, subject to all terms of use at ! http://www.image.ucar.edu/DAReS/DART/DART_download +! This module only supports standard gamma distributions for which the +! lower bound is 0. + module gamma_distribution_mod use types_mod, only : r8, PI, missing_r8 @@ -10,7 +13,7 @@ module gamma_distribution_mod use normal_distribution_mod, only : normal_cdf, inv_cdf -use distribution_params_mod, only : distribution_params_type +use distribution_params_mod, only : distribution_params_type, GAMMA_DISTRIBUTION use random_seq_mod, only : random_seq_type, random_uniform @@ -60,18 +63,17 @@ subroutine test_gamma ! Compare to matlab write(*, *) 'Absolute value of differences should be less than 1e-15' do i = 1, 7 - pdf_diff(i) = gamma_pdf(mx(i), mshape(i), mscale(i)) - mpdf(i) - cdf_diff(i) = gamma_cdf(mx(i), mshape(i), mscale(i), .true., .false., 0.0_r8, missing_r8) - mcdf(i) + pdf_diff(i) = abs(gamma_pdf(mx(i), mshape(i), mscale(i)) - mpdf(i)) + cdf_diff(i) = abs(gamma_cdf(mx(i), mshape(i), mscale(i)) - mcdf(i)) write(*, *) i, pdf_diff(i), cdf_diff(i) end do - -if(abs(maxval(pdf_diff)) < 1e-15_r8 .and. abs(maxval(cdf_diff)) < 1e-15_r8) then +if(maxval(pdf_diff) < 1e-15_r8 .and. maxval(cdf_diff) < 1e-15_r8) then write(*, *) 'Matlab Comparison Tests: PASS' else write(*, *) 'Matlab Compariosn Tests: FAIL' endif -! Input a mean and variance +! Test many x values for cdf and inverse cdf for a single mean and sd (shape and scale) mean = 10.0_r8 sd = 1.0_r8 variance = sd**2 @@ -84,22 +86,20 @@ subroutine test_gamma max_diff = -1.0_r8 do i = 0, 1000 x = mean + ((i - 500.0_r8) / 500.0_r8) * 5.0_r8 * sd - y = gamma_cdf(x, gamma_shape, gamma_scale, .true., .false., 0.0_r8, missing_r8) - inv = inv_gamma_cdf(y, gamma_shape, gamma_scale, .true., .false., 0.0_r8, missing_r8) + y = gamma_cdf(x, gamma_shape, gamma_scale) + inv = inv_gamma_cdf(y, gamma_shape, gamma_scale) max_diff = max(abs(x-inv), max_diff) end do write(*, *) '----------------------------' write(*, *) 'max difference in inversion is ', max_diff write(*, *) 'max difference should be less than 1e-11' - if(max_diff < 1e-11_r8) then write(*, *) 'Inversion Tests: PASS' else write(*, *) 'Inversion Tests: FAIL' endif - end subroutine test_gamma !----------------------------------------------------------------------- @@ -110,21 +110,23 @@ function inv_gamma_cdf_params(quantile, p) result(x) real(r8), intent(in) :: quantile type(distribution_params_type), intent(in) :: p -! Could do error checks for gamma_shape and gamma_scale values here +! Only standard gamma currently supported +if(p%lower_bound /= 0.0_r8 .and. p%upper_bound /= missing_r8) then + errstring = 'Only standard gamma distribution with lower bound of 0 is supported' + call error_handler(E_ERR, 'inv_gamma_cdf_params', errstring, source) +end if + x = inv_cdf(quantile, gamma_cdf_params, inv_gamma_first_guess_params, p) end function inv_gamma_cdf_params !----------------------------------------------------------------------- -function inv_gamma_cdf(quantile, gamma_shape, gamma_scale, & - bounded_below, bounded_above, lower_bound, upper_bound) result(x) +function inv_gamma_cdf(quantile, gamma_shape, gamma_scale) result(x) real(r8) :: x real(r8), intent(in) :: quantile real(r8), intent(in) :: gamma_shape real(r8), intent(in) :: gamma_scale -logical, intent(in) :: bounded_below, bounded_above -real(r8), intent(in) :: lower_bound, upper_bound ! Given a quantile q, finds the value of x for which the gamma cdf ! with shape and scale has approximately this quantile @@ -132,9 +134,9 @@ function inv_gamma_cdf(quantile, gamma_shape, gamma_scale, & type(distribution_params_type) :: p ! Load the p type for the generic cdf calls -p%params(1) = gamma_shape; p%params(2) = gamma_scale -p%bounded_below = bounded_below; p%bounded_above = bounded_above -p%lower_bound = lower_bound; p%upper_bound = upper_bound +p%params(1) = gamma_shape; p%params(2) = gamma_scale +p%bounded_below = .true.; p%bounded_above = .false. +p%lower_bound = 0.0_r8; p%upper_bound = missing_r8 x = inv_gamma_cdf_params(quantile, p) @@ -174,23 +176,28 @@ function gamma_cdf_params(x, p) real(r8) :: gamma_shape, gamma_scale +! Only standard gamma currently supported +if(p%lower_bound /= 0.0_r8 .and. p%upper_bound /= missing_r8) then + errstring = 'Only standard gamma distribution with lower bound of 0 is supported' + call error_handler(E_ERR, 'gamma_cdf_params', errstring, source) +end if + gamma_shape = p%params(1); gamma_scale = p%params(2) -gamma_cdf_params = gamma_cdf(x, gamma_shape, gamma_scale, & - p%bounded_below, p%bounded_above, p%lower_bound, p%upper_bound) +gamma_cdf_params = gamma_cdf(x, gamma_shape, gamma_scale) end function gamma_cdf_params !--------------------------------------------------------------------------- -function gamma_cdf(x, gamma_shape, gamma_scale, bounded_below, bounded_above, lower_bound, upper_bound) +function gamma_cdf(x, gamma_shape, gamma_scale) ! Returns the cumulative distribution of a gamma function with shape and scale ! at the value x +! Returns a failed_value if called with illegal values + real(r8) :: gamma_cdf real(r8), intent(in) :: x, gamma_shape, gamma_scale -logical, intent(in) :: bounded_below, bounded_above -real(r8), intent(in) :: lower_bound, upper_bound ! All inputs must be nonnegative if(x < 0.0_r8 .or. gamma_shape < 0.0_r8 .or. gamma_scale < 0.0_r8) then @@ -360,7 +367,7 @@ function random_gamma(r, rshape, rscale) ! Draw from U(0, 1) to get a quantile quantile = random_uniform(r) ! Invert cdf to get a draw from gamma -random_gamma = inv_gamma_cdf(quantile, rshape, rscale, .true., .false., 0.0_r8, missing_r8) +random_gamma = inv_gamma_cdf(quantile, rshape, rscale) end function random_gamma @@ -425,7 +432,7 @@ function inv_gamma_first_guess_params(quantile, p) ! A translation routine that is required to use the generic first_guess for ! the cdf optimization routine. ! Extracts the appropriate information from the distribution_params_type that is needed -! for a call to the function approx_inv_normal_cdf below (which is nothing). +! for a call to the function inv_gamma_first_guess below. real(r8) :: gamma_shape, gamma_scale @@ -446,26 +453,25 @@ function inv_gamma_first_guess(quantile, gamma_shape, gamma_scale) ! For starters, take the mean for this shape and scale inv_gamma_first_guess = gamma_shape * gamma_scale ! Could use info about sd to further refine mean and reduce iterations -!!!sd = sqrt(gamma_shape * gamma_scale**2) end function inv_gamma_first_guess !--------------------------------------------------------------------------- -subroutine set_gamma_params_from_ens(ens, num, bounded_below, bounded_above, & - lower_bound, upper_bound, p) +subroutine set_gamma_params_from_ens(ens, num, p) integer, intent(in) :: num real(r8), intent(in) :: ens(num) -logical, intent(in) :: bounded_below, bounded_above -real(r8), intent(in) :: lower_bound, upper_bound -type(distribution_params_type), intent(inout) :: p +type(distribution_params_type), intent(out) :: p real(r8) :: gamma_shape, gamma_scale +! Set up the description of the gamma distribution defined by the ensemble +p%distribution_type = GAMMA_DISTRIBUTION + ! Set the bounds info -p%bounded_below = bounded_below; p%bounded_above = bounded_above -p%lower_bound = lower_bound; p%upper_bound = upper_bound +p%bounded_below = .true.; p%bounded_above = .false. +p%lower_bound = 0.0_r8; p%upper_bound = missing_r8 ! Get shape and scale call gamma_shape_scale(ens, num, gamma_shape, gamma_scale) diff --git a/assimilation_code/modules/assimilation/normal_distribution_mod.f90 b/assimilation_code/modules/assimilation/normal_distribution_mod.f90 index b6b84a4baa..f77dea403d 100644 --- a/assimilation_code/modules/assimilation/normal_distribution_mod.f90 +++ b/assimilation_code/modules/assimilation/normal_distribution_mod.f90 @@ -6,7 +6,7 @@ module normal_distribution_mod use types_mod, only : r8, missing_r8, digits12, PI -use utilities_mod, only : E_ERR, E_MSG, error_handler +use utilities_mod, only : E_ERR, E_ALLMSG, error_handler use distribution_params_mod, only : distribution_params_type, NORMAL_DISTRIBUTION @@ -61,6 +61,7 @@ subroutine test_normal 1e-5_r8, 1e-4_r8, 1e-3_r8, 1e-2_r8, 1e-1_r8, 1e-0_r8] ! Compare to matlab +write(*, *) 'Absolute value of differences should be less than 1e-15' ! Absolute value of differences should be less than 1e-15 do i = 1, 7 cdf_diff(i) = normal_cdf(mx(i), mmean(i), msd(i)) - mcdf(i) @@ -157,6 +158,9 @@ function inv_weighted_normal_cdf(alpha, mean, sd, q) result(x) real(r8) :: normalized_q ! VARIABLES THROUGHOUT NEED TO SWITCH TO DIGITS_12 +! The comment above is not consistent with the performance of these routines +! as validated by test_normal. There is no evidence that this is still +! required. HK @todo test_normal fails if r8=r4, so beware. ! Can search in a standard normal, then multiply by sd at end and add mean ! Divide q by alpha to get the right place for weighted normal @@ -196,8 +200,6 @@ function approx_inv_normal_cdf(quantile_in) result(x) real(r8), intent(in) :: quantile_in ! This is used to get a good first guess for the search in inv_std_normal_cdf -! The params argument is not needed here but is required for consistency & -! with other distributions ! normal inverse ! translate from http://home.online.no/~pjacklam/notes/invnorm @@ -285,7 +287,6 @@ function inv_std_normal_cdf(quantile) result(x) ! Given a quantile q, finds the value of x for which the standard normal cdf ! has approximately this quantile -! Where should the stupid p type come from type(distribution_params_type) :: p real(r8) :: mean, sd @@ -293,6 +294,11 @@ function inv_std_normal_cdf(quantile) result(x) mean = 0.0_r8; sd = 1.0_r8 p%params(1) = mean; p%params(2) = sd +! Normal is unbounded +p%distribution_type = NORMAL_DISTRIBUTION +p%bounded_below = .false.; p%bounded_above = .false. +p%lower_bound = missing_r8; p%upper_bound = missing_r8 + x = inv_std_normal_cdf_params(quantile, p) end function inv_std_normal_cdf @@ -301,6 +307,10 @@ end function inv_std_normal_cdf function inv_cdf(quantile_in, cdf, first_guess, p) result(x) +! This routine is used for many distributions, not just the normal distribution +! Could be moved to its own module for clarity or combined with Ian Groom's +! rootfinding module as an option. + interface function cdf(x, p) use types_mod, only : r8 @@ -338,6 +348,11 @@ function first_guess(quantile, p) ! Limit on number of times to halve the increment; No deep thought. integer, parameter :: max_half_iterations = 25 +! Largest delta for computing centered difference derivative +! Changing this can affect accuracy for specific applications like Ian Grooms KDE +! Changing to 1e-9 allows all of the KDE tests to PASS +real(r8), parameter :: max_delta = 1e-8_r8 + real(r8) :: quantile real(r8) :: reltol, dq_dx, delta real(r8) :: x_guess, q_guess, x_new, q_new, del_x, del_q, del_q_old, q_old @@ -387,7 +402,7 @@ function first_guess(quantile, p) ! Analytically, the PDF is derivative of CDF but this can be numerically inaccurate for extreme values ! Use numerical derivatives of the CDF to get more accurate inversion ! These values for the delta for the approximation work with Gfortran - delta = max(1e-8_r8, 1e-8_r8 * abs(x_guess)) + delta = max(max_delta, max_delta * abs(x_guess)) dq_dx = (cdf(x_guess + delta, p) - cdf(x_guess - delta, p)) / (2.0_r8 * delta) ! Derivative of 0 means we're not going anywhere else if(dq_dx <= 0.0_r8) then @@ -431,7 +446,7 @@ function first_guess(quantile, p) ! Not currently happening for any of the test cases on gfortran x = x_new write(errstring, *) 'Failed to converge for quantile ', quantile -call error_handler(E_MSG, 'inv_cdf', errstring, source) +call error_handler(E_ALLMSG, 'inv_cdf', errstring, source) !!!call error_handler(E_ERR, 'inv_cdf', errstring, source) end function inv_cdf @@ -482,7 +497,7 @@ subroutine set_normal_params_from_ens(ens, num, p) integer, intent(in) :: num real(r8), intent(in) :: ens(num) -type(distribution_params_type), intent(inout) :: p +type(distribution_params_type), intent(out) :: p ! Set up the description of the normal distribution defined by the ensemble p%distribution_type = NORMAL_DISTRIBUTION @@ -490,6 +505,9 @@ subroutine set_normal_params_from_ens(ens, num, p) ! The two meaningful params are the mean and standard deviation call normal_mean_sd(ens, num, p%params(1), p%params(2)) +! Normal is unbounded +p%bounded_below = .false.; p%bounded_above = .false. +p%lower_bound = missing_r8; p%upper_bound = missing_r8 end subroutine set_normal_params_from_ens diff --git a/assimilation_code/modules/assimilation/probit_transform_mod.f90 b/assimilation_code/modules/assimilation/probit_transform_mod.f90 index c5645c15a1..6b3dfe1fd2 100644 --- a/assimilation_code/modules/assimilation/probit_transform_mod.f90 +++ b/assimilation_code/modules/assimilation/probit_transform_mod.f90 @@ -138,11 +138,9 @@ subroutine transform_to_probit(ens_size, state_ens_in, distribution_type, p, & elseif(p%distribution_type == UNIFORM_DISTRIBUTION) then call to_probit_uniform(ens_size, state_ens, p, probit_ens, use_input_p, lower_bound, upper_bound) elseif(p%distribution_type == GAMMA_DISTRIBUTION) then - call to_probit_gamma(ens_size, state_ens, p, probit_ens, use_input_p, & - bounded_below, bounded_above, lower_bound, upper_bound) + call to_probit_gamma(ens_size, state_ens, p, probit_ens, use_input_p) elseif(p%distribution_type == BETA_DISTRIBUTION) then - call to_probit_beta(ens_size, state_ens, p, probit_ens, use_input_p, & - lower_bound, upper_bound) + call to_probit_beta(ens_size, state_ens, p, probit_ens, use_input_p) elseif(p%distribution_type == BOUNDED_NORMAL_RH_DISTRIBUTION) then call to_probit_bounded_normal_rh(ens_size, state_ens, p, probit_ens, & use_input_p, bounded_below, bounded_above, lower_bound, upper_bound) @@ -253,16 +251,13 @@ end subroutine to_probit_uniform !------------------------------------------------------------------------ -subroutine to_probit_gamma(ens_size, state_ens, p, probit_ens, use_input_p, & - bounded_below, bounded_above, lower_bound, upper_bound) +subroutine to_probit_gamma(ens_size, state_ens, p, probit_ens, use_input_p) integer, intent(in) :: ens_size real(r8), intent(in) :: state_ens(ens_size) type(distribution_params_type), intent(inout) :: p real(r8), intent(out) :: probit_ens(ens_size) logical, intent(in) :: use_input_p -logical, intent(in) :: bounded_below, bounded_above -real(r8), intent(in) :: lower_bound, upper_bound ! Probit transform for gamma. real(r8) :: quantile @@ -272,15 +267,7 @@ subroutine to_probit_gamma(ens_size, state_ens, p, probit_ens, use_input_p, & ! Get the parameters for this distribution if not already available if(.not. use_input_p) then - - ! In full generality, gamma must be bounded either below or above - if(.not. (bounded_below .neqv. bounded_above)) then - errstring = 'Gamma distribution requires either bounded above or below to be true' - call error_handler(E_ERR, 'to_probit_gamma', errstring, source) - endif - - call set_gamma_params_from_ens(state_ens, ens_size, bounded_below, bounded_above, & - lower_bound, upper_bound, p) + call set_gamma_params_from_ens(state_ens, ens_size, p) endif do i = 1, ens_size @@ -294,15 +281,13 @@ end subroutine to_probit_gamma !------------------------------------------------------------------------ -subroutine to_probit_beta(ens_size, state_ens, p, probit_ens, use_input_p, & - lower_bound, upper_bound) +subroutine to_probit_beta(ens_size, state_ens, p, probit_ens, use_input_p) integer, intent(in) :: ens_size real(r8), intent(in) :: state_ens(ens_size) type(distribution_params_type), intent(inout) :: p real(r8), intent(out) :: probit_ens(ens_size) logical, intent(in) :: use_input_p -real(r8), intent(in) :: lower_bound, upper_bound ! Probit transform for beta. real(r8) :: quantile @@ -310,7 +295,7 @@ subroutine to_probit_beta(ens_size, state_ens, p, probit_ens, use_input_p, & ! Get the parameters for this distribution if not already available if(.not. use_input_p) then - call set_beta_params_from_ens(state_ens, ens_size, lower_bound, upper_bound, p) + call set_beta_params_from_ens(state_ens, ens_size, p) endif do i = 1, ens_size diff --git a/developer_tests/qceff/test_force_bounds.f90 b/developer_tests/qceff/test_force_bounds.f90 new file mode 100644 index 0000000000..5f9f81eac6 --- /dev/null +++ b/developer_tests/qceff/test_force_bounds.f90 @@ -0,0 +1,97 @@ +! DART software - Copyright UCAR. This open source software is provided +! by UCAR, "as is", without charge, subject to all terms of use at +! http://www.image.ucar.edu/DAReS/DART/DART_download + +program test_force_bounds + +use algorithm_info_mod, only : init_algorithm_info_mod, end_algorithm_info_mod, probit_dist_info +use distribution_params_mod, only : GAMMA_DISTRIBUTION, BETA_DISTRIBUTION, NORMAL_DISTRIBUTION +use utilities_mod, only : initialize_utilities, finalize_utilities +use obs_kind_mod, only : QTY_AQUIFER_WATER, QTY_AMMONIUM_SULPHATE +use types_mod, only : r8, MISSING_R8 + +use test + +implicit none + +logical :: is_state, is_inflation +logical :: bounded_below, bounded_above +real(r8) :: lower_bound, upper_bound +integer :: dist_type + + +call initialize_utilities('test_table_read') + +call init_algorithm_info_mod() + +! QTY1 +! inflation GAMMA +call probit_dist_info(QTY_AQUIFER_WATER, .false., .true., dist_type, & + bounded_below, bounded_above, lower_bound, upper_bound) + +call plan(30) + +call ok(dist_type == GAMMA_DISTRIBUTION) +call ok(bounded_below) +call ok(.not. bounded_above) +call ok(lower_bound == 0.0_r8) +call ok(upper_bound == MISSING_R8) + +! state BETA +call probit_dist_info(QTY_AQUIFER_WATER, .true., .false., dist_type, & + bounded_below, bounded_above, lower_bound, upper_bound) + +call ok(dist_type == BETA_DISTRIBUTION) +call ok(bounded_below) +call ok(bounded_above) +call ok(lower_bound == 0.0_r8) +call ok(upper_bound == 1.0_r8) + +! extended state NORMAL +call probit_dist_info(QTY_AQUIFER_WATER, .false., .false., dist_type, & + bounded_below, bounded_above, lower_bound, upper_bound) + +call ok(dist_type == NORMAL_DISTRIBUTION) +call ok(.not. bounded_below) +call ok(.not. bounded_above) +call ok(lower_bound == MISSING_R8) +call ok(upper_bound == MISSING_R8) + + +! QTY2 +! inflation BETA +call probit_dist_info(QTY_AMMONIUM_SULPHATE , .false., .true., dist_type, & + bounded_below, bounded_above, lower_bound, upper_bound) + +call ok(dist_type == BETA_DISTRIBUTION) +call ok(bounded_below) +call ok(bounded_above) +call ok(lower_bound == 0.0_r8) +call ok(upper_bound == 1.0_r8) + +! state GAMMA +call probit_dist_info(QTY_AMMONIUM_SULPHATE , .true., .false., dist_type, & + bounded_below, bounded_above, lower_bound, upper_bound) + +call ok(dist_type == GAMMA_DISTRIBUTION) +call ok(bounded_below) +call ok(.not. bounded_above) +call ok(lower_bound == 0.0_r8) +call ok(upper_bound == MISSING_R8) + +! extended state BETA +call probit_dist_info(QTY_AMMONIUM_SULPHATE , .false., .false., dist_type, & + bounded_below, bounded_above, lower_bound, upper_bound) + +call ok(dist_type == BETA_DISTRIBUTION) +call ok(bounded_below) +call ok(bounded_above) +call ok(lower_bound == 0.0_r8) +call ok(upper_bound == 1.0_r8) + + +call end_algorithm_info_mod() + +call finalize_utilities() + +end program test_force_bounds diff --git a/developer_tests/qceff/work/input.nml b/developer_tests/qceff/work/input.nml index ffa7155436..d3109c405c 100644 --- a/developer_tests/qceff/work/input.nml +++ b/developer_tests/qceff/work/input.nml @@ -1,3 +1,7 @@ +&algorithm_info_nml + qceff_table_filename = 'qcf_force_beta_gamma.csv' +/ + &utilities_nml TERMLEVEL = 1, module_details = .false. diff --git a/developer_tests/qceff/work/input.nml.no_alg b/developer_tests/qceff/work/input.nml.no_alg new file mode 100644 index 0000000000..4b8b92d6b6 --- /dev/null +++ b/developer_tests/qceff/work/input.nml.no_alg @@ -0,0 +1,31 @@ +&utilities_nml + TERMLEVEL = 1, + module_details = .false. + logfilename = 'dart_log.out' + / + +# pick a random set of inputs +&preprocess_nml + overwrite_output = .true. + input_obs_qty_mod_file = '../../../assimilation_code/modules/observations/DEFAULT_obs_kind_mod.F90' + output_obs_qty_mod_file = '../../../assimilation_code/modules/observations/obs_kind_mod.f90' + input_obs_def_mod_file = '../../../observations/forward_operators/DEFAULT_obs_def_mod.F90' + output_obs_def_mod_file = '../../../observations/forward_operators/obs_def_mod.f90' + obs_type_files = '../../../observations/forward_operators/obs_def_reanalysis_bufr_mod.f90', + '../../../observations/forward_operators/obs_def_radar_mod.f90', + '../../../observations/forward_operators/obs_def_metar_mod.f90', + '../../../observations/forward_operators/obs_def_dew_point_mod.f90', + '../../../observations/forward_operators/obs_def_rel_humidity_mod.f90', + '../../../observations/forward_operators/obs_def_altimeter_mod.f90', + '../../../observations/forward_operators/obs_def_gps_mod.f90', + '../../../observations/forward_operators/obs_def_vortex_mod.f90', + '../../../observations/forward_operators/obs_def_gts_mod.f90', + '../../../observations/forward_operators/obs_def_QuikSCAT_mod.f90' + quantity_files = '../../../assimilation_code/modules/observations/default_quantities_mod.f90', + / + +&obs_kind_nml +/ + + + diff --git a/developer_tests/qceff/work/qcf_force_beta_gamma.csv b/developer_tests/qceff/work/qcf_force_beta_gamma.csv new file mode 100644 index 0000000000..31528d2db6 --- /dev/null +++ b/developer_tests/qceff/work/qcf_force_beta_gamma.csv @@ -0,0 +1,4 @@ +QCEFF table version: 1,obs_error_info,,,,probit_inflation,,,,,probit_state,,,,,probit_extended_state,,,,,obs_inc_info,,,, +QTY_NAME,bounded_below,bounded_above,lower_bound,upper_bound,dist_type,bounded_below,bounded_above,lower_bound,upper_bound,dist_type,bounded_below,bounded_above,lower_bound,upper_bound,dist_type,bounded_below,bounded_above,lower_bound,upper_bound,filter_kind,bounded_below,bounded_above,lower_bound,upper_bound +QTY_AQUIFER_WATER,.false.,.false.,-888888,-888888,GAMMA_DISTRIBUTION,.false.,.true.,-888888,1234,BETA_DISTRIBUTION,.false.,.false.,-888888,-888888,NORMAL_DISTRIBUTION,.false.,.false.,-888888,-888888,EAKF,.false.,.false.,-888888,-888888 +QTY_AMMONIUM_SULPHATE ,.false.,.false.,-888888,-888888,BETA_DISTRIBUTION,.false.,.true.,-888888,-145.6,GAMMA_DISTRIBUTION,.false.,.false.,45,44,BETA_DISTRIBUTION,.false.,.false.,-888888,-888888,EAKF,.false.,.false.,-888888,-888888 \ No newline at end of file diff --git a/developer_tests/qceff/work/quickbuild.sh b/developer_tests/qceff/work/quickbuild.sh index 81b1308494..24b053a36f 100755 --- a/developer_tests/qceff/work/quickbuild.sh +++ b/developer_tests/qceff/work/quickbuild.sh @@ -18,6 +18,7 @@ LOCATION="threed_sphere" serial_programs=( test_table_read +test_force_bounds ) # quickbuild arguments diff --git a/developer_tests/qceff/work/runall.sh b/developer_tests/qceff/work/runall.sh index 72597f775b..87cffcb0f5 100755 --- a/developer_tests/qceff/work/runall.sh +++ b/developer_tests/qceff/work/runall.sh @@ -41,6 +41,9 @@ else fi } +cp input.nml input.nml.bak +cp input.nml.no_alg input.nml + run_test ; should_pass "no table" run_test qcf_table.txt ; should_pass "correct v1 table" @@ -69,3 +72,4 @@ run_test all_bnrhf_qceff_table.csv ; should_pass "lower case QTY" run_test qcf_table_lower_case_dist.txt; should_pass "lower case dist_type" +cp input.nml.bak input.nml diff --git a/guide/qceff_probit.rst b/guide/qceff_probit.rst index 299dd58778..130ee21e8b 100644 --- a/guide/qceff_probit.rst +++ b/guide/qceff_probit.rst @@ -133,12 +133,16 @@ Available distributions * NORMAL_DISTRIBUTION (default) * BOUNDED_NORMAL_RH_DISTRIBUTION - * GAMMA_DISTRIBUTION - * BETA_DISTRIBUTION + * GAMMA_DISTRIBUTION (with a lower bound at 0) + * BETA_DISTRIBUTION (bound between 0 and 1) * LOG_NORMAL_DISTRIBUTION * UNIFORM_DISTRIBUTION * KDE_DISTRIBUTION +.. warning:: + + If GAMMA_DISTRIBUTION or BETA_DISTRIBUTION is selected for a quantity, the bounds in + the QCEFF table are ignored and the standard bounds for the distribution are used. .. _Default values: