feat: Introduce ivpin() function and update related documentation and…

… code - Added new ivpin() function, an improved version of the Volume-Synchronized Probability of Informed Trading (VPIN), based on Lin and Ke (2017). - Updated documentation files including README.md, NEWS.md, cran-comments.md, and vignettes/PINstimation.rmd to reflect the introduction of ivpin(). - Added new references to inst/REFERENCES.bib for the Lin and Ke (2017) paper. - Modified relevant help files in man/ (estimate.vpin-class.Rd and PINstimation-package.Rd) to include ivpin(). - Updated the NAMESPACE to export ivpin(). - Adjusted R/model_vpin.R and R/model_factorizations.R to incorporate the logic for ivpin(). - Added validation checks in R/args_validation.R and updated messages in R/utilities_messages.R. - Refactored output classes to include ivpin() support in R/output_classes.R.
monty-se · Oct 22, 2024 · dbc0fff · dbc0fff
1 parent 0737c50
commit dbc0fff
Show file tree

Hide file tree

Showing 19 changed files with 1,219 additions and 296 deletions.
diff --git a/.gitignore b/.gitignore
@@ -10,5 +10,6 @@ docs/
 rjarticle/
 _pkgdown.yml
 pkgdown/
-man/*.Rd
 vignettes/*.html
+/doc/
+/Meta/
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -18,7 +18,7 @@ LazyDataCompression: xz
 RoxygenNote: 7.3.1
 Roxygen: list(markdown = TRUE)
 VignetteBuilder: knitr
-Imports: Rdpack, knitr, methods, skellam, nloptr, furrr, future, dplyr, rmarkdown, coda
+Imports: Rdpack, knitr, methods, skellam, nloptr, furrr, future, dplyr, rmarkdown, coda, magrittr
 RdMacros: Rdpack
 Depends: R (>= 3.5.0)
 Suggests: fansi, htmltools

diff --git a/NAMESPACE b/NAMESPACE
@@ -21,6 +21,7 @@ export(initials_mpin)
 export(initials_pin_ea)
 export(initials_pin_gwj)
 export(initials_pin_yz)
+export(ivpin)
 export(mpin_ecm)
 export(mpin_ml)
 export(pin)
@@ -39,11 +40,13 @@ importFrom(coda,geweke.diag)
 importFrom(coda,mcmc)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,group_by)
+importFrom(dplyr,mutate)
 importFrom(dplyr,summarize)
 importFrom(furrr,future_map)
 importFrom(future,multisession)
 importFrom(future,plan)
 importFrom(future,sequential)
+importFrom(magrittr,"%>%")
 importFrom(methods,is)
 importFrom(methods,new)
 importFrom(methods,show)
@@ -55,6 +58,7 @@ importFrom(stats,cutree)
 importFrom(stats,dist)
 importFrom(stats,dpois)
 importFrom(stats,hclust)
+importFrom(stats,lag)
 importFrom(stats,na.omit)
 importFrom(stats,optim)
 importFrom(stats,pnorm)

diff --git a/NEWS.md b/NEWS.md
@@ -4,6 +4,15 @@
 - **`adjpin()`**: The function now includes the time spent on generating initial
   parameter sets in the total time displayed in the output. This enhancement
   provides a more comprehensive view of the time taken for the entire process.
+
+- **`ivpin()`**: This function implements an improved version of the 
+Volume-Synchronized Probability of Informed Trading (VPIN) based on the work of 
+Lin and Ke (2017). By employing a maximum likelihood estimation, `ivpin()` 
+enhances the stability of VPIN estimates, especially in cases with small volume
+buckets or infrequent informed trades. The function captures the information 
+embedded in volume time, generating more consistent and reliable results. It is
+designed to improve the predictability of flow toxicity in trading environments.
+
 
 ## Updates
 - **`initials_adjpin_rnd()`**: Updated the implementation for generating random

diff --git a/R/PINstimation.R b/R/PINstimation.R
@@ -173,6 +173,8 @@
 #' \insertCite{Yan2012;textual}{PINstimation}.
 #' \item \link{vpin} estimates the volume-synchronized probability of informed
 #' trading (`VPIN`).
+#' \item \link{ivpin} estimates the improved volume-synchronized probability
+#' of informed trading (`IVPIN`).
 #' }
 #'
 #' @section Datasets:

diff --git a/R/args_validation.R b/R/args_validation.R
@@ -638,7 +638,7 @@
 
     if (vn %in% c(
       "fact", "verbose", "is_parallel", "correction", "ea_correction",
-      "fullreport")) gn <- "xlogical"
+      "fullreport", "improved")) gn <- "xlogical"
 
     if (vn %in% c("algorithm", "method", "detectlayers", "factorization", "frequency")) {
       gn <- "xcharacter"

diff --git a/R/model_factorizations.R b/R/model_factorizations.R
@@ -802,6 +802,64 @@ factorizations <- list(
       return(-lkhd)
 
     }
+  },
+
+  ivpin = function(data) {
+    # returns the factorization of the likelihood function associed the ivpin
+    # model evaluated at the dataset 'data' to be used with optimization
+    # functions such as optim() or neldermead()
+    #
+    # Args:
+    #   data    : the dataset of Vb, Vs and t (See paper of Ke and Lin 2017)
+    #
+    # Returns:
+    #   a function with argument 'params'
+
+    function(params) {
+
+      # If 'params' is not valid, return +Inf
+      # --------------------------------------------------------------
+      if (!missing(params) && length(params) != 5) return(+Inf)
+
+      # Prepare 'data' and initialize variables
+      # --------------------------------------------------------------
+
+      colnames(data) <- c("vb", "vs", "t")
+      a <- d <- mu <- eb <- es <- NULL
+
+      # Get the names of the variable from the function .xmpin().
+      # Without arguments , it returns c("a", "d", "mu", "eb", "es")
+      variables <- .xmpin$varnames()
+      for (i in 1:5) assign(variables[i], params[i])
+
+      # Start by constructing variables e1, e2, e3. Each of them is
+      # constructed daily and is stored in a column with the same name.
+      # The variable emax is constructed by taking the maximum among
+      # e1, e2 and e3; and is stored in a column with the same name.
+      # -------------------------------------------------------------
+      # e1 <- rep(log(alpha * delta), nrow(data)) + data$vb * log(eps.b) +
+      #   data$vs * log(eps.s + mu) - (eps.b + eps.s + mu) * data$t
+      # e2 <- rep(log(alpha * (1 - delta)), nrow(data)) + data$vb * log(eps.b + mu) +
+      #   data$vs * log(eps.s) - (eps.b + eps.s + mu) * data$t
+      # e3 <- rep(log(1 - alpha), nrow(data)) + data$vb * log(eps.b) +
+      #   data$vs * log(eps.b) - (eps.b + eps.s) * data$t
+      # emax <- pmax(e1, e2, e3)
+
+      e1 <- log(a * d) + data$vb * log(eb) +
+        data$vs * log(es + mu) - (eb + es + mu) * data$t
+      e2 <- log(a * (1 - d)) + data$vb * log(eb + mu) +
+        data$vs * log(es) - (eb + es  + mu) * data$t
+      e3 <- log(1 - a) + data$vb * log(eb) +
+        data$vs * log(es) - (eb + es) * data$t
+      emax <- pmax(e1, e2, e3, na.rm = TRUE)
+
+      # Compute and return the value of the log-likelihood function
+      # --------------------------------------------------------------
+      lkhd <- - sum(log(exp(e1 - emax) + exp(e2 - emax) + exp(e3 - emax)) +
+                      emax, na.rm = TRUE)
+
+      return(lkhd)
+    }
   }
 
 )