From bda91b4bd69d88fdc4dd8857c18abd30c42dea54 Mon Sep 17 00:00:00 2001 From: Rahul Saxena Date: Sun, 29 Mar 2020 00:29:53 +0530 Subject: [PATCH 1/4] Added the col extractR with corrections --- NAMESPACE | 1 + R/opt-column-extractor.R | 302 ++++++++++++++++++++ man/opt_column_extractor.Rd | 30 ++ tests/testthat/test-opt_column_extraction.R | 112 ++++++++ vignettes/opt-column-extractor.Rmd | 52 ++++ 5 files changed, 497 insertions(+) create mode 100644 R/opt-column-extractor.R create mode 100644 man/opt_column_extractor.Rd create mode 100644 tests/testthat/test-opt_column_extraction.R create mode 100644 vignettes/opt-column-extractor.Rmd diff --git a/NAMESPACE b/NAMESPACE index b19674c..5bae840 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ export(all_optimizers) export(max_optimizers) +export(opt_column_extractor) export(opt_common_subexpr) export(opt_constant_folding) export(opt_constant_propagation) diff --git a/R/opt-column-extractor.R b/R/opt-column-extractor.R new file mode 100644 index 0000000..acc3f28 --- /dev/null +++ b/R/opt-column-extractor.R @@ -0,0 +1,302 @@ +#' Optimizer: Efficient Column Extraction. +#' +#' Performs optimisations for column extraction throughout the code. +#' Carefully examine the results after running this function! +#' +#' @param texts A list of character vectors with the code to optimize. +#' +#' @examples +#' code <- paste ( +#' "points <- data.frame (x = rnorm (100), y = rnorm (100))", +#' "points[ , 2]", +#' "#This is a comment!!", +#' "poimts [[2]]", +#' "points$y", +#' "yo <- 1", +#' "yo", +#' "points[[c (2)]]", +#' ".subset2 (mtcars, 2)", +#' sep = "\n" +#' ) +#' cat (opt_column_extractor (list (code))$codes[[1]]) +#' @export +#' +opt_column_extractor <- function (texts) { + res <- list () + res$codes <- lapply (texts, ce_one_file) + res +} + +# Executes efficient column selection/extraction on one text of code. +# +# @param texts A character vector with code to optimize. +# +ce_one_file <- function (texts){ + pd <- parse_text (texts) + res_fpd <- ce_one_fpd (pd) + deparse_data (res_fpd) +} + +# Executes searching operations to find dollar sign. +# +# @param fpd A flatten parsed data data.frame. +# @param node_id Id of the node to be examined +# +has_dollar_sign <- function (fpd, node_id) { + "'$'" %in% fpd[fpd$parent == node_id, ]$token +} + + +# Executes searching operations related to dollar sign to find dataset. +# +# @param fpd A flatten parsed data data.frame. +# @param node_id Id of the node to be examined +# +dollar_dataset <- function (fpd, node_id) { + open_id <- fpd[fpd$parent == node_id & fpd$token == "'$'", "pos_id"] + fpd[fpd$parent == node_id & fpd$token == "SYMBOL" & + fpd$pos_id < open_id, "text"] +} + + +# Executes searching operations related to dollar sign to find column number. +# +# @param fpd A flatten parsed data data.frame. +# @param node_id Id of the node to be examined +# +dollar_colname <- function (fpd, node_id) { + open_id <- fpd[fpd$parent == node_id & fpd$token == "'$'", "pos_id"] + fpd[fpd$parent == node_id & fpd$token == "SYMBOL" & + fpd$pos_id > open_id, ]$text +} + +# Executes searching operations to find double square brackets (LBB). +# +# @param fpd A flatten parsed data data.frame. +# @param node_id Id of the node to be examined +# +has_square_brackets <- function (fpd, node_id) { + "LBB" %in% fpd[fpd$parent == node_id, ]$token +} + +# Executes searching operations related to double square brackets (LBB) to find dataset. +# +# @param fpd A flatten parsed data data.frame. +# @param node_id Id of the node to be examined +# +square_dataset <- function (fpd, node_id) { + node_children <- fpd[fpd$parent == node_id, ] + open_id <- node_children[node_children$token == "LBB", "pos_id"] + node_children[node_children$token == "SYMBOL" & + (node_children$pos_id < open_id), "text"] +} + +# Executes searching operations related to double square brackets (LBB) to find column number. +# +# @param fpd A flatten parsed data data.frame. +# @param node_id Id of the node to be examined +# +square_colnum <- function (fpd, node_id) { + node_children <- fpd[fpd$parent == node_id, ] + open_id <- node_children[node_children$token == "LBB", "pos_id"] + close_id <- max(node_children[node_children$token == "']'", "pos_id"]) + node_children[node_children$pos_id > open_id & node_children$pos_id < close_id + & (node_children$token == "NUM_CONST" | node_children$token == "expr"| + node_children$token == "SYMBOL"), "text"] +} + +# Executes searching operations to find single square brackets. +# +# @param fpd A flatten parsed data data.frame. +# @param node_id Id of the node to be examined +# +has_single_bracket <- function (fpd, node_id) { + "'['" %in% fpd[fpd$parent == node_id, ]$token +} + + +# Executes searching operations related to single square bracket to find dataset. +# +# @param fpd A flatten parsed data data.frame. +# @param node_id Id of the node to be examined +# +single_dataset <- function (fpd, node_id) { + node_children <- fpd[fpd$parent == node_id, ] + open_id <- node_children[node_children$token == "'['", "pos_id"] + node_children[node_children$token == "SYMBOL" & node_children$pos_id < open_id, "text"] +} + + +# Executes searching operations related to single square brackets to find column number. +# +# @param fpd A flatten parsed data data.frame. +# @param node_id Id of the node to be examined +# +single_colnum <- function (fpd, node_id) { + node_children <- fpd[fpd$parent == node_id, ] + open_id <- node_children[node_children$token == "'['", "pos_id"] + close_id <- node_children[node_children$token == "']'", "pos_id"] + node_children[node_children$pos_id > open_id & node_children$pos_id < close_id + & (node_children$token == "NUM_CONST" | node_children$token == "expr"| + node_children$token == "SYMBOL"), "text"] +} + + +# Executes efficient column selection/extraction of a fpd. +# +# @param fpd A flatten parsed data data.frame. +# +ce_one_fpd <- function (pd) { + fpd <- flatten_leaves (pd) + fun_exam_nodes <- NULL + + fun_ids <- fpd$id[fpd$token == "FUNCTION"] + fun_prnt_ids <- fpd$parent[fpd$id %in% fun_ids] + for (i in fun_prnt_ids){ + fun_exam_nodes <- rbind (fun_exam_nodes, get_children (fpd, i, FALSE)[get_children (fpd, i, FALSE)$token == "expr", ]) + } + exam_nodes <- get_roots (pd) + exam_nodes <- rbind (exam_nodes, fun_exam_nodes) + + final_exam_nodes <- NULL + not_to_edit <- NULL + column_name <- c (NA); length (column_name) <- length (exam_nodes$id) + data_frame <- c (NA); length (data_frame) <- length (exam_nodes$id) + method_used <- c (NA); length (method_used) <- length (exam_nodes$id) + + for (i in seq_len (nrow (exam_nodes))) { + test_id <- exam_nodes[i, ]$id + if (has_dollar_sign (fpd, test_id)) { + data_frame <- append (data_frame, dollar_dataset (fpd, test_id)) + column_name <- append (column_name, dollar_colname (fpd, test_id)) + final_exam_nodes <- rbind (final_exam_nodes, exam_nodes[i, ]) + method_used <- append (method_used, "$") + } else if (has_square_brackets (fpd, test_id) ) { + data_frame <- append (data_frame, square_dataset (fpd, test_id)) + column_name <- append (column_name, square_colnum (fpd, test_id)) + final_exam_nodes <- rbind (final_exam_nodes, exam_nodes[i, ]) + method_used <- append (method_used, "LBB") + } else if (has_single_bracket (fpd, test_id) ) { + data_frame <- append (data_frame, single_dataset (fpd, test_id)) + column_name <- append (column_name, single_colnum (fpd, test_id)) + final_exam_nodes <- rbind (final_exam_nodes, exam_nodes[i, ]) + method_used <- append (method_used, "[") + } else { + not_to_edit <- rbind (not_to_edit, exam_nodes[i, ]) + } + } + + column_name <- column_name[!is.na (column_name)] + data_frame <- data_frame[!is.na (data_frame)] + method_used <- method_used[!is.na (method_used)] + + for (i in seq_len (nrow (final_exam_nodes))) { + if (! (method_used[i] == "$")) { + #Replacing column extraction methods with .subset2 + final_exam_nodes[i, "text"] <- sprintf (".subset2 (%s, %s)", data_frame[i], column_name[i]) + }else { + #What we are trying to do here: .subset2 (mtcars, which (colnames (mtcars) == "mpg")) + final_exam_nodes[i, "text"] <- sprintf (".subset2 (%s, which (colnames (%s) == \"%s\"))", data_frame[i], data_frame[i], column_name[i]) + } + } + + for (i in seq_len (nrow (not_to_edit))) { + not_to_edit <- rbind (not_to_edit, get_children (pd, not_to_edit[i, "id"])) + } + + not_to_edit <- not_to_edit[order (not_to_edit$pos_id), ] + #Removing entries with duplicate IDs. + not_to_edit_final <- NULL + for (i in unique (not_to_edit$id)) { + not_to_edit_final <- rbind (not_to_edit_final, unique (not_to_edit[not_to_edit$id == i, ])) + } + #These ids will be used for correction of new_lines after the substitution + final_exam_nodes_ids <- final_exam_nodes$id + + new_fpd <- NULL + for (itr in seq_len (nrow (final_exam_nodes))) + { + act_fpd <- final_exam_nodes[itr, ] + new_act_fpd <- parse_text (act_fpd$text) + + #Setting new ids for the newly edited and parsed codes + new_act_fpd$id <- paste0 (act_fpd$id, "_", new_act_fpd$id) + + #Keeping old parents for new fpd + new_act_fpd$parent[new_act_fpd$parent != 0] <- paste0 (act_fpd$id, "_", + new_act_fpd$parent[new_act_fpd$parent != 0]) + new_act_fpd$parent[new_act_fpd$parent == 0] <- act_fpd$parent + + #Calling a pre-wriiten rco::function.... + new_act_fpd$pos_id <- create_new_pos_id (act_fpd, nrow (new_act_fpd), act_fpd$id) + + #Fixing the next_spaces section of new_fpd + new_act_fpd$next_spaces[nrow (new_act_fpd)] <- act_fpd$next_spaces + + #Fixing the next_lines section of new_fpd + new_act_fpd$next_lines[nrow (new_act_fpd)] <- act_fpd$next_lines + + #Fixing the prev_spaces section of new_fpd + new_act_fpd$prev_spaces[which (new_act_fpd$terminal)[[1]]] <- act_fpd$prev_spaces + + #Merging the new_fpd and the act_fpd (obtained upon iteration) + new_fpd <- rbind (new_fpd, new_act_fpd) + + #Ordering the new_fpd according to the pos_id + new_fpd <- new_fpd[order (new_fpd$pos_id), ] + } + + resultant_fpd <- rbind (not_to_edit_final, new_fpd) + resultant_fpd <- resultant_fpd[order (resultant_fpd$pos_id), ] + + test_fpd <- NULL + test_fpd <- flatten_leaves (resultant_fpd) + + comments_ids <- NULL + curr_new_line_ids <- NULL + next_line_ids <- NULL + correction_nodes <- NULL + correction_ids <- NULL + correction_nodes <- test_fpd[test_fpd$parent == 0 & is.na (test_fpd$next_spaces) + & is.na (test_fpd$next_lines) & is.na (test_fpd$prev_spaces), ] + + for (i in correction_nodes$id){ + correction_ids <- append (correction_ids, which (test_fpd$id == i, arr.ind = TRUE)) + } + + test_fpd[which (is.na (test_fpd$next_lines)), "next_lines"] <- 0 + test_fpd[which (is.na (test_fpd$next_spaces)), "next_spaces"] <- 0 + test_fpd[which (is.na (test_fpd$prev_spaces)), "prev_spaces"] <- 0 + + comments_ids <- which (test_fpd$parent < 0) + correction_ids <- append (correction_ids, comments_ids) + for (i in correction_ids){ + if ( (i-1) > 0){ + test_fpd[i-1, "next_lines"] <- 1 + }else{ + next + } + } + test_fpd <- test_fpd[! (test_fpd$parent == 0 & test_fpd$terminal == FALSE), ] + deletion_nodes <- NULL + for (i in final_exam_nodes_ids){ + if (i %in% test_fpd$id){ + deletion_nodes <- rbind (deletion_nodes, get_children (test_fpd, i)) + } + } + next_line_ids <- as.double (test_fpd[test_fpd$id %in% deletion_nodes$id + & test_fpd$next_lines == 1, "pos_id"]) + test_fpd <- test_fpd[! (test_fpd$id %in% deletion_nodes$id), ] + for (i in next_line_ids){ + curr_new_line_ids <- append (curr_new_line_ids, + max (which (test_fpd$pos_id < i))) + } + for (i in curr_new_line_ids) { + test_fpd[i, ]$next_lines <- 1 + } + + test_fpd +} + + + diff --git a/man/opt_column_extractor.Rd b/man/opt_column_extractor.Rd new file mode 100644 index 0000000..cdb9663 --- /dev/null +++ b/man/opt_column_extractor.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/opt-column-extractor.R +\name{opt_column_extractor} +\alias{opt_column_extractor} +\title{Optimizer: Efficient Column Extraction.} +\usage{ +opt_column_extractor(texts) +} +\arguments{ +\item{texts}{A list of character vectors with the code to optimize.} +} +\description{ +Performs optimisations for column extraction throughout the code. +Carefully examine the results after running this function! +} +\examples{ +code <- paste ( + "points <- data.frame (x = rnorm (100), y = rnorm (100))", + "points[ , 2]", + "#This is a comment!!", + "poimts [[2]]", + "points$y", + "yo <- 1", + "yo", + "points[[c (2)]]", + ".subset2 (mtcars, 2)", + sep = "\n" +) +cat (opt_column_extractor (list (code))$codes[[1]]) +} diff --git a/tests/testthat/test-opt_column_extraction.R b/tests/testthat/test-opt_column_extraction.R new file mode 100644 index 0000000..987e863 --- /dev/null +++ b/tests/testthat/test-opt_column_extraction.R @@ -0,0 +1,112 @@ +context("opt_column_extractor") + +test_that("Replace function call with system call for all general cases", { + code <- paste("mtcars[ , 11]", + "#This is a comment!!", + "mtcars [[11]]", + "mtcars$carb", + "yo <- 1", + "yo", + "mtcars[[c(11)]]", + ".subset2(mtcars, 11)", + sep = "\n") + opt_code <- opt_column_extractor(list(code))$codes[[1]] + expect_equal(opt_code, paste( + ".subset2 (mtcars, 11)", + "#This is a comment!!", + ".subset2 (mtcars, 11)", + ".subset2 (mtcars, which (colnames (mtcars) == \"carb\"))", + "yo <- 1", + "yo", + ".subset2 (mtcars, c(11))", + ".subset2(mtcars, 11)", + sep = "\n" + )) +}) + +test_that("Testing on user-created datasets", { + code <- paste("points <- data.frame(x = rnorm(100), y = rnorm(100))", + "points[ ,2]", + "rnorm(100)", + "#This is a comment!!", + "points [[2]]", + "points$y", + "yo <- 1", + "yo", + "points[[c(2)]]", + ".subset2(points, 2)", + sep = "\n") + opt_code <- opt_column_extractor(list(code))$codes[[1]] + expect_equal(opt_code, paste( + "points <- data.frame(x = rnorm(100), y = rnorm(100))", + ".subset2 (points, 2)", + "rnorm(100)", + "#This is a comment!!", + ".subset2 (points, 2)", + ".subset2 (points, which (colnames (points) == \"y\"))", + "yo <- 1", + "yo", + ".subset2 (points, c(2))", + ".subset2(points, 2)", + sep = "\n" + )) +}) + +test_that("A code snippet consisting of only a function", { + code <- paste( + "foo <- function(x, n){", + "print(x)", + "rnorm(100)", + "x[ ,n]", + "x$n", + "y <- list(A = 1, C = 3)", + "y[2]", + "}", + sep = "\n" + ) + opt_code <- opt_column_extractor(list(code))$codes[[1]] + expect_equal(opt_code, paste( + "foo <- function(x, n){", + "print(x)", + "rnorm(100)", + ".subset2 (x, n)", + ".subset2 (x, which (colnames (x) == \"n\"))", + "y <- list(A = 1, C = 3)", + ".subset2 (y, 2)", + "}", + sep = "\n" + )) +}) + +test_that("Code snippet containing normal and function code", { + code <- paste( + "custom <- matrix(1:9, nrow = 3, ncol = 3)", + "#I am Mr.Comment, y'all!!", + "mtcars[ ,5]", + "foo <- function(x, n){", + "x[ ,n]", + "}", + "custom[ ,2]", + "milky_moo <- function(obj){", + "print(\"obj\")", + "}", + "random_variable", + sep = "\n" + ) + opt_code <- opt_column_extractor(list(code))$codes[[1]] + expect_equal(opt_code, paste( + "custom <- matrix(1:9, nrow = 3, ncol = 3)", + "#I am Mr.Comment, y'all!!", + ".subset2 (mtcars, 5)", + "foo <- function(x, n){", + ".subset2 (x, n)", + "}", + ".subset2 (custom, 2)", + "milky_moo <- function(obj){", + "print(\"obj\")", + "}", + "random_variable", + sep = "\n" + )) +}) + diff --git a/vignettes/opt-column-extractor.Rmd b/vignettes/opt-column-extractor.Rmd new file mode 100644 index 0000000..0703f5e --- /dev/null +++ b/vignettes/opt-column-extractor.Rmd @@ -0,0 +1,52 @@ +--- +output: rmarkdown::html_vignette +title: Efficient Column Extraction +vignette: > + %\VignetteIndexEntry{Dead Code Elimination} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include=FALSE} +# Efficient Column Extraction +knitr::opts_chunk$set(echo = TRUE) +``` + +## Background + +Column extraction refers to the act of pulling out the values of a particular column from a given data frame/list/matrix. The below microbenchmark plot clearly shows the vast difference in execution time between `.subset2()` and other methods of column extraction. Therefore, it would make sense to replace other column extraction methods with `.subset2()` since it is a function call while all others are function calls. + +```{r Microbenchmark plot, echo=FALSE, message=FALSE, warning=FALSE} +library("microbenchmark") +library("ggplot2") +autoplot(microbenchmark( + mtcars[,11], + mtcars$carb, + mtcars[[c(11)]], + mtcars[[11]], + .subset2(mtcars, 11) +)) +``` + +For example, consider the following code: + +```{r raw_code, eval=FALSE} + mtcars[,11] + mtcars$carb + replace_text <- c(1,2,3,4,5) + .subset2(mtcars, 11) +``` + +There is nothing wrong with the above code, but as noticed earlier it can be made efficient, if we replace the given column extraction methods with system calls(`.subset2()`). That is, it can be made more efficient in the following way: + +```{r opt_code, eval=FALSE} + .subset2(mtcars, 11) + .subset2(mtcars, 11) + replace_text <- c(1,2,3,4,5) + .subset2(mtcars, 11) + +``` + +## Implementation + +The `opt_column_extractor` function of the `rco` achieves this by, parsing the given code snippet, detecting the usage of column extraction methods/techniques and replacing it with the most appropriate one. \ No newline at end of file From 653e6c092a68a663dda8c10800c2b687904a5927 Mon Sep 17 00:00:00 2001 From: Rahul Saxena Date: Sun, 29 Mar 2020 00:54:08 +0530 Subject: [PATCH 2/4] Made changes in vignette --- vignettes/opt-column-extractor.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/opt-column-extractor.Rmd b/vignettes/opt-column-extractor.Rmd index 0703f5e..7d7a2b1 100644 --- a/vignettes/opt-column-extractor.Rmd +++ b/vignettes/opt-column-extractor.Rmd @@ -2,7 +2,7 @@ output: rmarkdown::html_vignette title: Efficient Column Extraction vignette: > - %\VignetteIndexEntry{Dead Code Elimination} + %\VignetteIndexEntry{Efficient Column Extraction} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- From 0bbe11be0615c51395f9a54b5471ffa62fea25ab Mon Sep 17 00:00:00 2001 From: Rahul Saxena Date: Mon, 30 Mar 2020 04:09:10 +0530 Subject: [PATCH 3/4] Possible fix to Issue #31 --- R/opt-dead-code.R | 63 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/R/opt-dead-code.R b/R/opt-dead-code.R index 177e5cd..df2435b 100644 --- a/R/opt-dead-code.R +++ b/R/opt-dead-code.R @@ -44,6 +44,26 @@ dc_one_file <- function(text) { # @param fpd A flatten parsed data data.frame. # dc_one_fpd <- function(fpd) { + + Fun_def_ids <- fpd[fpd$token == "FUNCTION", "parent"] + + #check if function bodies have never assigned vars + if(length(Fun_def_ids) > 0) + { + i <- 1 + for (i in 1:length(Fun_def_ids)) { + Flag <- Is_Var_Assigned(fpd, Fun_def_ids[[i]]) + if(Flag[1] == TRUE) + next + else { + warning("Please remove the unassigned variable or assign value. It may lead to errors.") + k <- 3 + for(k in 3:(as.numeric(Flag[2])+1)){ + print(Flag[k]) + } + } + } + } # first remove code that is after (and equally nested) next, break, or return new_fpd <- remove_after_interruption(fpd) @@ -288,3 +308,46 @@ unindent_fpd <- function(fpd, parent_spaces) { fpd[fpd$id %in% new_line_ids, "prev_spaces"] - prnt_diff fpd } + + +Is_Var_Assigned <- function(fpd, id) +{ + Fun_Ids <- sapply(id, function(act_id){ + utils::tail(fpd$id[fpd$parent == act_id & fpd$token == "expr"], 1) + }) + + act_fpd <- get_children(fpd, Fun_Ids) + + + Checklist_expr <- NULL + Checklist <- NULL + Checklist_var <- act_fpd[act_fpd$token == "SYMBOL" & act_fpd$next_lines == 1 & act_fpd$parent == Fun_Ids, ] + expr_ids <- act_fpd[act_fpd$parent == Fun_Ids, ]$id + j <- 1 + assignment_exprs <- c("LEFT_ASSIGN", "RIGHT_ASSIGN", "EQ_ASSIGN") + sys_call <- c("SYMBOL_FUNCTION_CALL") + for(j in seq_len(length(expr_ids))){ + test_id <- expr_ids[j] + if(length(act_fpd[act_fpd$parent == test_id, ]$id) > 0) + { + if(!any(act_fpd[act_fpd$parent == test_id, ]$token %in% assignment_exprs) & !any(act_fpd[act_fpd$parent == test_id, ]$token %in% sys_call)){ + Checklist_expr <- rbind(Checklist_expr, act_fpd[act_fpd$id == test_id, ]) + } + } + } + Checklist <- rbind(Checklist_expr, Checklist_var) + Check_Flag <- NULL + if(length(Checklist$id) > 0) + { + itr <- NULL + Check_Flag <- FALSE + for(itr in 1:length(Checklist$id)) + { + Check_Flag <- append(Check_Flag, sprintf("Function: %s Variable: %s", (fpd[fpd$parent == fpd[fpd$id == act_fpd[1, ]$parent, ]$parent & fpd$token == "SYMBOL", ]$text), (Checklist[itr, ]$text))) + } + Check_Flag <- append(Check_Flag, length(Check_Flag), 1) + } + else + Check_Flag <- TRUE + return (Check_Flag) +} \ No newline at end of file From c81f55fee179a051540f9ba60881b07737d81267 Mon Sep 17 00:00:00 2001 From: Rahul Saxena Date: Mon, 30 Mar 2020 04:26:27 +0530 Subject: [PATCH 4/4] Possible fix to issue #63 --- R/opt-dead-expr.R | 67 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/R/opt-dead-expr.R b/R/opt-dead-expr.R index 5603140..a095411 100644 --- a/R/opt-dead-expr.R +++ b/R/opt-dead-expr.R @@ -48,10 +48,28 @@ de_one_file <- function(text) { # de_one_pd <- function(pd) { res_pd <- pd - + fpd <- flatten_leaves(pd) # exprs in functions dont have any effect. However, on the global env they # print on console, so just analyze function definitions fun_def_ids <- pd[pd$token == "FUNCTION", "parent"] + + #check if function bodies have never assigned vars + if(length(fun_def_ids) > 0) { + i <- 1 + for ( i in 1:length(fun_def_ids)) { + flag <- is_var_assigned(fpd, fun_def_ids[[i]]) + if(flag[1] == TRUE) + next + else { + warning("Please remove the unassigned variable or assign value. It may lead to errors.") + k <- 3 + for(k in 3:(as.numeric(flag[2])+1)){ + print(flag[k]) + } + } + } + } + # get unassigned expressions dead_exprs_ids <- unlist(lapply(fun_def_ids, function(act_id) { @@ -162,3 +180,50 @@ get_fun_last_exprs <- function(pd, id) { # returns last exprs and their children ids get_children(pd, last_exprs_ids)$id } + +# Checks whether some value is being assigned to all var. +# +# @param fpd A parsed data data.frame. +# @param id A numeric indicating the node ID of the function to search for +# unassigned expressions. +# +is_var_assigned <- function(fpd, id) { + fun_ids <- sapply(id, function(act_id){ + utils::tail(fpd$id[fpd$parent == act_id & fpd$token == "expr"], 1) + }) + + act_fpd <- get_children(fpd, fun_ids) + + + checklist_expr <- NULL + checklist <- NULL + checklist_var <- act_fpd[act_fpd$token == "SYMBOL" & act_fpd$next_lines == 1 & + act_fpd$parent == fun_ids, ] + expr_ids <- act_fpd[act_fpd$parent == fun_ids, ]$id + j <- 1 + assignment_exprs <- c("LEFT_ASSIGN", "RIGHT_ASSIGN", "EQ_ASSIGN") + sys_call <- c("SYMBOL_FUNCTION_CALL") + for(j in seq_len(length(expr_ids))){ + test_id <- expr_ids[j] + if (length(act_fpd[act_fpd$parent == test_id, ]$id) > 0) { + if(!any(act_fpd[act_fpd$parent == test_id, ]$token %in% assignment_exprs) & + !any(act_fpd[act_fpd$parent == test_id, ]$token %in% sys_call)){ + checklist_expr <- rbind(checklist_expr, act_fpd[act_fpd$id == test_id, ]) + } + } + } + checklist <- rbind(checklist_expr, checklist_var) + check_flag <- NULL + if (length(checklist$id) > 0) { + itr <- NULL + check_flag <- FALSE + for (itr in 1:length(checklist$id)) { + check_flag <- append(check_flag, sprintf("Function: %s Variable: %s", (fpd[fpd$parent == fpd[fpd$id == act_fpd[1, ]$parent, ]$parent & fpd$token == "SYMBOL", ]$text), (checklist[itr, ]$text))) + } + check_flag <- append(check_flag, length(check_flag), 1) + } + else + check_flag <- TRUE + check_flag +} +