Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use file-level expressions and avoid slow loop for whitespace_linter() for up to 50% speedup #2024

Merged
merged 8 commits into from
Jul 29, 2023
78 changes: 26 additions & 52 deletions R/make_linter_from_regex.R
Original file line number Diff line number Diff line change
@@ -1,73 +1,53 @@
make_linter_from_regex <- function(regex,
lint_type,
lint_msg,
ignore_strings = TRUE) {
# If a regex-based linter is found, only flag those lints that occur within
# a relevant section of source code
.in_ignorable_position <- function(source_expression, line_number, match) {
ignore_strings && in_string(source_expression, line_number, match)
}

lint_msg) {
function() {
Linter(function(source_expression) {
if (!is_lint_level(source_expression, "expression")) {
if (!is_lint_level(source_expression, "file")) {
return(list())
}

all_matches <- re_matches(
source_expression[["lines"]],
source_expression[["file_lines"]],
regex,
locations = TRUE,
global = TRUE
global = FALSE
)
all_matches <- all_matches[!is.na(all_matches$start), ]
all_matches$line_number <- as.integer(rownames(all_matches))

line_numbers <- as.integer(names(source_expression[["lines"]]))

lints <- Map(
function(line_matches, line_number) {
lapply(
split(line_matches, seq_len(nrow(line_matches))),
function(.match) {
if (
is.na(.match[["start"]]) ||
.in_ignorable_position(source_expression, line_number, .match)
) {
return()
}
start <- .match[["start"]]
end <- .match[["end"]]
Lint(
filename = source_expression[["filename"]],
line_number = line_number,
column_number = start,
type = lint_type,
message = lint_msg,
line = source_expression[["lines"]][[as.character(line_number)]],
ranges = list(c(start, end))
)
}
)
},
all_matches,
line_numbers
)
matches_by_row <- split(all_matches, seq_len(nrow(all_matches)))

Filter(function(x) any(lengths(x) > 0L), lints)
lints <- lapply(matches_by_row, function(.match) {
if (is_match_covered(.match, source_expression)) {
return()
}
Lint(
filename = source_expression[["filename"]],
line_number = .match$line_number,
type = lint_type,
message = lint_msg,
line = source_expression[["file_lines"]][[rownames(.match)]],
ranges = list(c(.match$start, .match$end))
)
})
lints[lengths(lints) > 0L]
})
}
}

#' Determine if a regex match is covered by an expression in a source_expression
#'
#' @param match The position where a regex match was observed.
#' match must have entries "start", "end", and "line_number".
#' @param source_expression A source_expression
#' @param line_number,match The position where a regex match was observed.
#' match must have entries "start" and "end".
#' @param token_type Restrict analysis to tokens of this type, for example,
#' with token_type = "STR_CONST" you can check that a regex match occurs
#' within a string
#' @noRd
is_match_covered <- function(source_expression, line_number, match, token_type = NULL) {
pc <- source_expression[["parsed_content"]]
is_match_covered <- function(match, source_expression, token_type = "STR_CONST") {
line_number <- match$line_number
pc <- source_expression[["full_parsed_content"]]
if (!is.null(token_type)) {
pc <- pc[pc[["token"]] == token_type, ]
}
Expand All @@ -92,9 +72,3 @@ is_match_covered <- function(source_expression, line_number, match, token_type =

any_single_line_covers() || any_multi_line_covers()
}

in_string <- function(source_expression, line_number, match) {
# do any of the strings in the parsed content contain the matched regex?

is_match_covered(source_expression, line_number, match, "STR_CONST")
}
Loading