Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Possible fix for Issue #63 #154

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export(all_optimizers)
export(max_optimizers)
export(opt_column_extractor)
export(opt_common_subexpr)
export(opt_constant_folding)
export(opt_constant_propagation)
Expand Down
302 changes: 302 additions & 0 deletions R/opt-column-extractor.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,302 @@
#' Optimizer: Efficient Column Extraction.
#'
#' Performs optimisations for column extraction throughout the code.
#' Carefully examine the results after running this function!
#'
#' @param texts A list of character vectors with the code to optimize.
#'
#' @examples
#' code <- paste (
#' "points <- data.frame (x = rnorm (100), y = rnorm (100))",
#' "points[ , 2]",
#' "#This is a comment!!",
#' "poimts [[2]]",
#' "points$y",
#' "yo <- 1",
#' "yo",
#' "points[[c (2)]]",
#' ".subset2 (mtcars, 2)",
#' sep = "\n"
#' )
#' cat (opt_column_extractor (list (code))$codes[[1]])
#' @export
#'
opt_column_extractor <- function (texts) {
res <- list ()
res$codes <- lapply (texts, ce_one_file)
res
}

# Executes efficient column selection/extraction on one text of code.
#
# @param texts A character vector with code to optimize.
#
ce_one_file <- function (texts){
pd <- parse_text (texts)
res_fpd <- ce_one_fpd (pd)
deparse_data (res_fpd)
}

# Executes searching operations to find dollar sign.
#
# @param fpd A flatten parsed data data.frame.
# @param node_id Id of the node to be examined
#
has_dollar_sign <- function (fpd, node_id) {
"'$'" %in% fpd[fpd$parent == node_id, ]$token
}


# Executes searching operations related to dollar sign to find dataset.
#
# @param fpd A flatten parsed data data.frame.
# @param node_id Id of the node to be examined
#
dollar_dataset <- function (fpd, node_id) {
open_id <- fpd[fpd$parent == node_id & fpd$token == "'$'", "pos_id"]
fpd[fpd$parent == node_id & fpd$token == "SYMBOL" &
fpd$pos_id < open_id, "text"]
}


# Executes searching operations related to dollar sign to find column number.
#
# @param fpd A flatten parsed data data.frame.
# @param node_id Id of the node to be examined
#
dollar_colname <- function (fpd, node_id) {
open_id <- fpd[fpd$parent == node_id & fpd$token == "'$'", "pos_id"]
fpd[fpd$parent == node_id & fpd$token == "SYMBOL" &
fpd$pos_id > open_id, ]$text
}

# Executes searching operations to find double square brackets (LBB).
#
# @param fpd A flatten parsed data data.frame.
# @param node_id Id of the node to be examined
#
has_square_brackets <- function (fpd, node_id) {
"LBB" %in% fpd[fpd$parent == node_id, ]$token
}

# Executes searching operations related to double square brackets (LBB) to find dataset.
#
# @param fpd A flatten parsed data data.frame.
# @param node_id Id of the node to be examined
#
square_dataset <- function (fpd, node_id) {
node_children <- fpd[fpd$parent == node_id, ]
open_id <- node_children[node_children$token == "LBB", "pos_id"]
node_children[node_children$token == "SYMBOL" &
(node_children$pos_id < open_id), "text"]
}

# Executes searching operations related to double square brackets (LBB) to find column number.
#
# @param fpd A flatten parsed data data.frame.
# @param node_id Id of the node to be examined
#
square_colnum <- function (fpd, node_id) {
node_children <- fpd[fpd$parent == node_id, ]
open_id <- node_children[node_children$token == "LBB", "pos_id"]
close_id <- max(node_children[node_children$token == "']'", "pos_id"])
node_children[node_children$pos_id > open_id & node_children$pos_id < close_id
& (node_children$token == "NUM_CONST" | node_children$token == "expr"|
node_children$token == "SYMBOL"), "text"]
}

# Executes searching operations to find single square brackets.
#
# @param fpd A flatten parsed data data.frame.
# @param node_id Id of the node to be examined
#
has_single_bracket <- function (fpd, node_id) {
"'['" %in% fpd[fpd$parent == node_id, ]$token
}


# Executes searching operations related to single square bracket to find dataset.
#
# @param fpd A flatten parsed data data.frame.
# @param node_id Id of the node to be examined
#
single_dataset <- function (fpd, node_id) {
node_children <- fpd[fpd$parent == node_id, ]
open_id <- node_children[node_children$token == "'['", "pos_id"]
node_children[node_children$token == "SYMBOL" & node_children$pos_id < open_id, "text"]
}


# Executes searching operations related to single square brackets to find column number.
#
# @param fpd A flatten parsed data data.frame.
# @param node_id Id of the node to be examined
#
single_colnum <- function (fpd, node_id) {
node_children <- fpd[fpd$parent == node_id, ]
open_id <- node_children[node_children$token == "'['", "pos_id"]
close_id <- node_children[node_children$token == "']'", "pos_id"]
node_children[node_children$pos_id > open_id & node_children$pos_id < close_id
& (node_children$token == "NUM_CONST" | node_children$token == "expr"|
node_children$token == "SYMBOL"), "text"]
}


# Executes efficient column selection/extraction of a fpd.
#
# @param fpd A flatten parsed data data.frame.
#
ce_one_fpd <- function (pd) {
fpd <- flatten_leaves (pd)
fun_exam_nodes <- NULL

fun_ids <- fpd$id[fpd$token == "FUNCTION"]
fun_prnt_ids <- fpd$parent[fpd$id %in% fun_ids]
for (i in fun_prnt_ids){
fun_exam_nodes <- rbind (fun_exam_nodes, get_children (fpd, i, FALSE)[get_children (fpd, i, FALSE)$token == "expr", ])
}
exam_nodes <- get_roots (pd)
exam_nodes <- rbind (exam_nodes, fun_exam_nodes)

final_exam_nodes <- NULL
not_to_edit <- NULL
column_name <- c (NA); length (column_name) <- length (exam_nodes$id)
data_frame <- c (NA); length (data_frame) <- length (exam_nodes$id)
method_used <- c (NA); length (method_used) <- length (exam_nodes$id)

for (i in seq_len (nrow (exam_nodes))) {
test_id <- exam_nodes[i, ]$id
if (has_dollar_sign (fpd, test_id)) {
data_frame <- append (data_frame, dollar_dataset (fpd, test_id))
column_name <- append (column_name, dollar_colname (fpd, test_id))
final_exam_nodes <- rbind (final_exam_nodes, exam_nodes[i, ])
method_used <- append (method_used, "$")
} else if (has_square_brackets (fpd, test_id) ) {
data_frame <- append (data_frame, square_dataset (fpd, test_id))
column_name <- append (column_name, square_colnum (fpd, test_id))
final_exam_nodes <- rbind (final_exam_nodes, exam_nodes[i, ])
method_used <- append (method_used, "LBB")
} else if (has_single_bracket (fpd, test_id) ) {
data_frame <- append (data_frame, single_dataset (fpd, test_id))
column_name <- append (column_name, single_colnum (fpd, test_id))
final_exam_nodes <- rbind (final_exam_nodes, exam_nodes[i, ])
method_used <- append (method_used, "[")
} else {
not_to_edit <- rbind (not_to_edit, exam_nodes[i, ])
}
}

column_name <- column_name[!is.na (column_name)]
data_frame <- data_frame[!is.na (data_frame)]
method_used <- method_used[!is.na (method_used)]

for (i in seq_len (nrow (final_exam_nodes))) {
if (! (method_used[i] == "$")) {
#Replacing column extraction methods with .subset2
final_exam_nodes[i, "text"] <- sprintf (".subset2 (%s, %s)", data_frame[i], column_name[i])
}else {
#What we are trying to do here: .subset2 (mtcars, which (colnames (mtcars) == "mpg"))
final_exam_nodes[i, "text"] <- sprintf (".subset2 (%s, which (colnames (%s) == \"%s\"))", data_frame[i], data_frame[i], column_name[i])
}
}

for (i in seq_len (nrow (not_to_edit))) {
not_to_edit <- rbind (not_to_edit, get_children (pd, not_to_edit[i, "id"]))
}

not_to_edit <- not_to_edit[order (not_to_edit$pos_id), ]
#Removing entries with duplicate IDs.
not_to_edit_final <- NULL
for (i in unique (not_to_edit$id)) {
not_to_edit_final <- rbind (not_to_edit_final, unique (not_to_edit[not_to_edit$id == i, ]))
}
#These ids will be used for correction of new_lines after the substitution
final_exam_nodes_ids <- final_exam_nodes$id

new_fpd <- NULL
for (itr in seq_len (nrow (final_exam_nodes)))
{
act_fpd <- final_exam_nodes[itr, ]
new_act_fpd <- parse_text (act_fpd$text)

#Setting new ids for the newly edited and parsed codes
new_act_fpd$id <- paste0 (act_fpd$id, "_", new_act_fpd$id)

#Keeping old parents for new fpd
new_act_fpd$parent[new_act_fpd$parent != 0] <- paste0 (act_fpd$id, "_",
new_act_fpd$parent[new_act_fpd$parent != 0])
new_act_fpd$parent[new_act_fpd$parent == 0] <- act_fpd$parent

#Calling a pre-wriiten rco::function....
new_act_fpd$pos_id <- create_new_pos_id (act_fpd, nrow (new_act_fpd), act_fpd$id)

#Fixing the next_spaces section of new_fpd
new_act_fpd$next_spaces[nrow (new_act_fpd)] <- act_fpd$next_spaces

#Fixing the next_lines section of new_fpd
new_act_fpd$next_lines[nrow (new_act_fpd)] <- act_fpd$next_lines

#Fixing the prev_spaces section of new_fpd
new_act_fpd$prev_spaces[which (new_act_fpd$terminal)[[1]]] <- act_fpd$prev_spaces

#Merging the new_fpd and the act_fpd (obtained upon iteration)
new_fpd <- rbind (new_fpd, new_act_fpd)

#Ordering the new_fpd according to the pos_id
new_fpd <- new_fpd[order (new_fpd$pos_id), ]
}

resultant_fpd <- rbind (not_to_edit_final, new_fpd)
resultant_fpd <- resultant_fpd[order (resultant_fpd$pos_id), ]

test_fpd <- NULL
test_fpd <- flatten_leaves (resultant_fpd)

comments_ids <- NULL
curr_new_line_ids <- NULL
next_line_ids <- NULL
correction_nodes <- NULL
correction_ids <- NULL
correction_nodes <- test_fpd[test_fpd$parent == 0 & is.na (test_fpd$next_spaces)
& is.na (test_fpd$next_lines) & is.na (test_fpd$prev_spaces), ]

for (i in correction_nodes$id){
correction_ids <- append (correction_ids, which (test_fpd$id == i, arr.ind = TRUE))
}

test_fpd[which (is.na (test_fpd$next_lines)), "next_lines"] <- 0
test_fpd[which (is.na (test_fpd$next_spaces)), "next_spaces"] <- 0
test_fpd[which (is.na (test_fpd$prev_spaces)), "prev_spaces"] <- 0

comments_ids <- which (test_fpd$parent < 0)
correction_ids <- append (correction_ids, comments_ids)
for (i in correction_ids){
if ( (i-1) > 0){
test_fpd[i-1, "next_lines"] <- 1
}else{
next
}
}
test_fpd <- test_fpd[! (test_fpd$parent == 0 & test_fpd$terminal == FALSE), ]
deletion_nodes <- NULL
for (i in final_exam_nodes_ids){
if (i %in% test_fpd$id){
deletion_nodes <- rbind (deletion_nodes, get_children (test_fpd, i))
}
}
next_line_ids <- as.double (test_fpd[test_fpd$id %in% deletion_nodes$id
& test_fpd$next_lines == 1, "pos_id"])
test_fpd <- test_fpd[! (test_fpd$id %in% deletion_nodes$id), ]
for (i in next_line_ids){
curr_new_line_ids <- append (curr_new_line_ids,
max (which (test_fpd$pos_id < i)))
}
for (i in curr_new_line_ids) {
test_fpd[i, ]$next_lines <- 1
}

test_fpd
}



63 changes: 63 additions & 0 deletions R/opt-dead-code.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,26 @@ dc_one_file <- function(text) {
# @param fpd A flatten parsed data data.frame.
#
dc_one_fpd <- function(fpd) {

Fun_def_ids <- fpd[fpd$token == "FUNCTION", "parent"]

#check if function bodies have never assigned vars
if(length(Fun_def_ids) > 0)
{
i <- 1
for (i in 1:length(Fun_def_ids)) {
Flag <- Is_Var_Assigned(fpd, Fun_def_ids[[i]])
if(Flag[1] == TRUE)
next
else {
warning("Please remove the unassigned variable or assign value. It may lead to errors.")
k <- 3
for(k in 3:(as.numeric(Flag[2])+1)){
print(Flag[k])
}
}
}
}
# first remove code that is after (and equally nested) next, break, or return
new_fpd <- remove_after_interruption(fpd)

Expand Down Expand Up @@ -288,3 +308,46 @@ unindent_fpd <- function(fpd, parent_spaces) {
fpd[fpd$id %in% new_line_ids, "prev_spaces"] - prnt_diff
fpd
}


Is_Var_Assigned <- function(fpd, id)
{
Fun_Ids <- sapply(id, function(act_id){
utils::tail(fpd$id[fpd$parent == act_id & fpd$token == "expr"], 1)
})

act_fpd <- get_children(fpd, Fun_Ids)


Checklist_expr <- NULL
Checklist <- NULL
Checklist_var <- act_fpd[act_fpd$token == "SYMBOL" & act_fpd$next_lines == 1 & act_fpd$parent == Fun_Ids, ]
expr_ids <- act_fpd[act_fpd$parent == Fun_Ids, ]$id
j <- 1
assignment_exprs <- c("LEFT_ASSIGN", "RIGHT_ASSIGN", "EQ_ASSIGN")
sys_call <- c("SYMBOL_FUNCTION_CALL")
for(j in seq_len(length(expr_ids))){
test_id <- expr_ids[j]
if(length(act_fpd[act_fpd$parent == test_id, ]$id) > 0)
{
if(!any(act_fpd[act_fpd$parent == test_id, ]$token %in% assignment_exprs) & !any(act_fpd[act_fpd$parent == test_id, ]$token %in% sys_call)){
Checklist_expr <- rbind(Checklist_expr, act_fpd[act_fpd$id == test_id, ])
}
}
}
Checklist <- rbind(Checklist_expr, Checklist_var)
Check_Flag <- NULL
if(length(Checklist$id) > 0)
{
itr <- NULL
Check_Flag <- FALSE
for(itr in 1:length(Checklist$id))
{
Check_Flag <- append(Check_Flag, sprintf("Function: %s Variable: %s", (fpd[fpd$parent == fpd[fpd$id == act_fpd[1, ]$parent, ]$parent & fpd$token == "SYMBOL", ]$text), (Checklist[itr, ]$text)))
}
Check_Flag <- append(Check_Flag, length(Check_Flag), 1)
}
else
Check_Flag <- TRUE
return (Check_Flag)
}
Loading