Minor fixed in the gcvlwls1d1, first pass for the docs of CreateOptions.

HBGDki · Jul 6, 2015 · 0012d3a · 0012d3a
1 parent 8215857
commit 0012d3a
Show file tree

Hide file tree

Showing 11 changed files with 51 additions and 48 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -6,7 +6,7 @@ Date: 2015-05-22
 Author: Dai, Hadjipantelis, Hao, Mueller and Wang
 Maintainer: Pantelis Z. Hadjipantelis <[email protected]>
 Description: PACE is a versatile package that provides implementation of various methods of Functional Data Analysis (FDA) and Empirical Dynamics. The core of this package is Functional Principal Component Analysis (FPCA), a key technique for functional data analysis, for sparsely or densely sampled random trajectories and time courses, via the Principal Analysis by Conditional Estimation (PACE) algorithm. PACE is useful for the analysis of data that have been generated by a sample of underlying (but usually not fully observed) random trajectories. It does not rely on pre-smoothing of trajectories, which is problematic if functional data are sparsely sampled. PACE provides options for functional regression and correlation, for Longitudinal Data Analysis, the analysis of stochastic processes from samples of realized trajectories, and for the analysis of underlying dynamics.
-Depends: R (>= 3.1.1), locfit, rARPACK, gtools, pracma, Hmisc, caret, plot3D, MASS
+Depends: R (>= 3.1.1), rARPACK, gtools, pracma, Hmisc, caret, plot3D, MASS
 License: BSD_3_clause 
 LazyData: true
 Imports: Rcpp (>= 0.11.5)

diff --git a/R/CreateOptions.R b/R/CreateOptions.R
@@ -1,35 +1,35 @@
 #' Create the options list used by FPCA
 #' 
-#' @param bwcov : bandwidth value for covariance function (positive numeric - default: determine automatically based on 'bwcovGcv')
-#' @param bwcovGcv : bandwidth choice method for covariance function ('GMeanAndGCV','CV','GCV - default: 'GMeanAndGCV'')
-#' @param bwmu : bandwidth choice for mean function is using CV or GCV (positive numeric - default: determine automatically based on 'bwmuGcv')
-#' @param bwmuGcv : bandwidth choice method for mean function ('GMeanAndGCV','CV','GCV - default: 'GMeanAndGCV'')
-#' @param corrPlot : make correlation plot (logical - default: FALSE)
-#' @param corrPlotType: which type of correlation plot to show ('Fitted', 'Raw', 'Smoothed' - default : 'Fitted')
-#' @param dataType : do we have sparse or dense functional data ('Sparse', 'Dense', 'DenseWithMV', 'p>>n' - default :  determine automatically based on 'IsRegular')
-#' @param designPlot : make design plot (logical - default: FALSE)
-#' @param error : assume measurement error in the dataset (logical - default: TRUE)
-#' @param FVEthreshold : Fraction-of-Variance-Explained threshold used during the SVD of the fitted covar. function ( numeric (0,1] - default: 0.9999) 
-#' @param kernel : smoothing kernel choice, common for mu and covariance, ("rect", "gauss", "epan", "gausvar", "quar" - default : "epan" for dense data else "gauss")
-#' @param methodCov :  method to estimate covariance ('PACE','RARE','CrossSectional' - automatically determined, user input ignored)
-#' @param methodMu :  method to estimate mu ('PACE','RARE','CrossSectional' - automatically determined, user input ignored)
-#' @param maxK : maximum number of principal components to consider (positive integer - default : min(20, N-1), N : # of curves)
-#' @param method : method to estimate the PC scores. Either 'CE' (default) or 'IN'
-#' @param newdata : new data points to estimate (numeric - default : NULL )
-#' @param ntest1 : number of curves used for CV when choosing bandwidth ( [1,N] - default : min(30, N-1), N : # of curves )
-#' @param nRegGrid : number of support points in each direction of covariance surface (numeric - default : 51 )
-#' @param numBins : number of bins to bin the data into (default : NULL)
-#' @param screePlot : make scree plot (logical - default : FALSE )
-#' @param selectionMethod : the method of choosing the number of principal components K ('FVE','AIC','BIC' : default 'FVE' - only 'FVE' avaiable now)
-#' @param shrink : apply shrinkage to estimates of random coefficients (dataType data only)
-#' @param outPercent : 2-element vector in [0,1] indicating the outPercent data in the boundary default(0,1)
-#' @param rho : truncation threshold for the iterative residual. Either 'cv': choose rho by leave-one-observation out cross-validation; 'no': for not using the iterative sigma2 estimate, or a numerical value.
-#' @param rotationCut : 2-element vector in [0,1] indicating the percent of data truncated during sigma^2 estimation (default c(1/4,3/4))
-#' @param useBinnedData : 'FORCE' (Enforce the # of bins), 'AUTO' (Select the # of  bins automatically), 'OFF' (Do not bin)
-#' @param useBins: testing purpose: whether to bin the same observed time points when 2D smoothing
-#' @param userCov : user-defined smoothed covariance function
-#' @param userMu : user-defined smoothed mean function 
-#' @param verbose : display diagnostic messages (default = FALSE)
+#' @param bwcov : bandwidth value for covariance function; positive numeric - default: determine automatically based on 'bwcovGcv'
+#' @param bwcovGcv : bandwidth choice method for covariance function; 'GMeanAndGCV','CV','GCV - default: 'GMeanAndGCV'')
+#' @param bwmu : bandwidth choice for mean function is using CV or GCV; positive numeric - default: determine automatically based on 'bwmuGcv'
+#' @param bwmuGcv : bandwidth choice method for mean function; 'GMeanAndGCV','CV','GCV - default: 'GMeanAndGCV''
+#' @param corrPlot : make correlation plot; logical - default: FALSE
+#' @param corrPlotType: which type of correlation plot to show; 'Fitted', 'Raw', 'Smoothed' - default: 'Fitted'
+#' @param dataType : do we have sparse or dense functional data; 'Sparse', 'Dense', 'DenseWithMV', 'p>>n' - default:  determine automatically based on 'IsRegular'
+#' @param designPlot : make design plot; logical - default: FALSE
+#' @param error : assume measurement error in the dataset; logical - default: TRUE
+#' @param FVEthreshold : Fraction-of-Variance-Explained threshold used during the SVD of the fitted covar. function; numeric (0,1] - default: 0.9999
+#' @param kernel : smoothing kernel choice, common for mu and covariance; "rect", "gauss", "epan", "gausvar", "quar" - default: "epan" for dense data else "gauss"
+#' @param methodCov :  method to estimate covariance; 'PACE','RARE','CrossSectional' - automatically determined, user input ignored
+#' @param methodMu :  method to estimate mu; 'PACE','RARE','CrossSectional' - automatically determined, user input ignored 
+#' @param maxK : maximum number of principal components to consider; positive integer - default: min(20, N-1), N : # of curves
+#' @param method : method to estimate the PC scores; 'CE', 'IN' - default: 'CE'
+#' @param newdata : new data points to estimate; numeric - default: NULL 
+#' @param ntest1 : number of curves used for CV when choosing bandwidth; [1,N] - default: min(30, N-1), N : # of curves
+#' @param nRegGrid : number of support points in each direction of covariance surface; numeric - default: 51
+#' @param numBins : number of bins to bin the data into; positive integer > 10, default: NULL
+#' @param screePlot : make scree plot; logical - default: FALSE
+#' @param selectionMethod : the method of choosing the number of principal components K; 'FVE','AIC','BIC' : default 'FVE' - only 'FVE' avaiable now/ default 'FVE')
+#' @param shrink : apply shrinkage to estimates of random coefficients (dense data only); logical - default: FALSE
+#' @param outPercent : 2-element vector in [0,1] indicating the outPercent data in the boundary - default (0,1)
+#' @param rho : truncation threshold for the iterative residual. 'cv': choose rho by leave-one-observation out cross-validation; 'no': use the iterative sigma2 estimate - default "cv".
+#' @param rotationCut : 2-element vector in [0,1] indicating the percent of data truncated during sigma^2 estimation; default  (0.25, 0.75))
+#' @param useBinnedData : 'FORCE' (Enforce the # of bins), 'AUTO' (Select the # of  bins automatically), 'OFF' (Do not bin) - default: 'AUTO'
+#' @param useBins: testing purpose: whether to bin the same observed time points when 2D smoothing; logical - default: FALSE
+#' @param userCov : user-defined smoothed covariance function; numerical matrix - default: NULL
+#' @param userMu : user-defined smoothed mean function; numerical vector - default: NULL
+#' @param verbose : display diagnostic messages; logical - default: FALSE
 #' @return an option list
 #' @examples 
 #' optLst = CreateOptions(kernel='rect');  # Create options list with rectangular kernel 

diff --git a/R/FPCA.R b/R/FPCA.R
@@ -1,4 +1,4 @@
-#' Perform FPCA on the functional data 'y' recorderd over 'tt'. Using the options specified in 'p'
+#' Perform FPCA on the functional data 'y' recorderd over 't'. Using the options specified in 'p'
 #' 
 #' @param y is an n-by-1 list of vectors
 #' @param t is an n-by-1 list of vectors

diff --git a/R/SetOptions.R b/R/SetOptions.R
@@ -88,7 +88,7 @@ SetOptions = function(y, t, optns){
     shrink = FALSE;
   }
   if(shrink == TRUE && (error != TRUE || method != "IN")){ # Check for valid shrinkage choice
-    cat('shrinkage method only had effects when method = "IN" and error = TRUE! Reset to shrink = FALSE now!\n');
+    cat('shrinkage method only has effects when method = "IN" and error = TRUE! Reset to shrink = FALSE now!\n');
     shrink = FALSE      
   }
   if(is.null(kernel)){ # smoothing kernel choice

diff --git a/R/cvlwls1d.R b/R/cvlwls1d.R
@@ -63,12 +63,12 @@ cvlwls1d <- function(yy, t, kernel, npoly, nder, dataType ){
 
 	}
 
-        if (any(win==0)){
-          nz = c(win != 0)
-          mu = Rlwls1d(bw= bw[j], kern=kernel, npoly=npoly, nder= nder, xin = ttn[nz], yin= xxn[nz], xout=out, win = win[nz])       
-        } else {
+#        if (any(win==0)){
+#          nz = c(win != 0)
+#          mu = Rlwls1d(bw= bw[j], kern=kernel, npoly=npoly, nder= nder, xin = ttn[nz], yin= xxn[nz], xout=out, win = win[nz])       
+#        } else {
           mu = Rlwls1d(bw= bw[j], kern=kernel, npoly=npoly, nder= nder, xin = ttn, yin= xxn, xout=out, win = win)
-        }
+       # }
 
         # if invalid==0 {
         cv[j]=cv[j]+t(obs-mu)%*%(obs-mu);

diff --git a/R/gcvlwls1d1.R b/R/gcvlwls1d1.R
@@ -37,8 +37,8 @@ gcvlwls1d1 <- function(yy,tt, kernel, npoly, nder, dataType, verbose=TRUE) {
   q = (r/(4*h0))^(1/9);
   bwCandidates = sort(q^(0:9)*h0) ;
 
-  idx = apply(X= sapply(X=t, FUN='==',  ...=sort(unique(t)) ),MARGIN=2, FUN=which)  
-
+  # idx = apply(X= sapply(X=t, FUN='==',  ...=sort(unique(t)) ),MARGIN=2, FUN=which)  
+    idx =  uniq(t)$n
   # This is to make sure we get the same as MATLAB PACE 
   # I would write them in a function (equivalent of mykernel.m) if it is worth it 
   # Similarly there is no reason to repeat the FOR-loop twice; this too can go into a seperate function

diff --git a/R/minb.R b/R/minb.R
@@ -7,7 +7,7 @@
 # Output b: the minimum bandwidth choice for vector x
 
 minb <- function(x, numPoints){ 
-  x = sort(x)
+  x = sort(unique(x));     # Unique is added to ensure that we do not have a degenerate design
   n = length(x);
   if( (numPoints<1) || (numPoints > n) ){
    warning("Invalid number of minimum points specified\n")

diff --git a/src/Rlwls1d.cpp b/src/Rlwls1d.cpp
@@ -58,7 +58,7 @@ Eigen::VectorXd Rlwls1d( const double & bw, const std::string kernel_type, const
 
   // Check that we do not have zero weights // Should do a try-catch here
   if ( !(win.all()) ){  // 
-    Rcpp::stop("Cases with zero-valued windows are not yet implemented");
+    Rcpp::warning("Cases with zero-valued windows maybe not be too safe.");
   }
 
   // The checks end here.

diff --git a/src/interp2lin.cpp b/src/interp2lin.cpp
@@ -32,7 +32,7 @@ Eigen::VectorXd interp2lin( const Eigen::Map<Eigen::VectorXd> & xin, const Eigen
   } else if ( xin.maxCoeff() <  xou.maxCoeff() ){
     Rcpp::warning("Output X-grid  is outside the upper ragne of the input X-grid.");
   } else if ( yin.maxCoeff() <  you.maxCoeff() ){
-    Rcpp::warning("Output X-grid  is outside the upper ragne of the input X-grid.");
+    Rcpp::warning("Output X-grid  is outside the upper range of the input X-grid.");
   } 
 
 

diff --git a/tests/testthat/test_FVEdataset.R b/tests/testthat/test_FVEdataset.R
@@ -1,18 +1,21 @@
 devtools::load_all()
 
+
+
+
 FVEdata <- read.table("http://www.hsph.harvard.edu/fitzmaur/ala2e/fev1.txt", col.names=c('SubjectID', 'Height', 'Age', 'InitialHeight', 'InitialAge', 'LogFEV1'), skip=42  );
 
 mySample = makePACEinputs(IDs= FVEdata$SubjectID, tVec=FVEdata$Age, yVec=FVEdata$LogFEV1);
 
 y= mySample$Ly
 t= mySample$Lt
 
-optns = CreateOptions()
-system.time(tmp <- FPCA(y, t, optns))
-tmp$sigma2
+# optns = CreateOptions()
+# system.time(tmp <- FPCA(y, t, optns))
+# tmp$sigma2
 
 
 optns1 <- CreateOptions(kernel='rect')
 system.time(tmp1 <- FPCA(y, t, optns1))
 plot(tmp1$phi[, 1]) # off
-createCorrPlot(tmp1, 'Smoothed', TRUE)
+createCorrPlot(tmp1, 'Smoothed', TRUE)
diff --git a/tests/testthat/test_GetSmoothedMeanCurve.R b/tests/testthat/test_GetSmoothedMeanCurve.R
@@ -5,7 +5,7 @@ load('data/dataGeneratedByExampleSeed123.RData')
 p = CreateOptions(kernel='epan')
 optns = SetOptions(y,t,p)
 out1 = sort(unique( c(unlist(t), optns$newdata)));
-out21 = seq(min(out1), max(out1),length.out = optns$ngrid);
+out21 = seq(min(out1), max(out1),length.out = 30);
 
 test_that("basic that the Epan. kernel gives the same results as MATLAB", {