Skip to content

Commit

Permalink
Use file-level expressions and avoid slow loop for whitespace_linter(…
Browse files Browse the repository at this point in the history
…) for up to 50% speedup (#2024)

* Use file-level expressions for whitespace_linter() for up to 50% speedup

* use full-source names everywhere

* better tailor make_linter_from_regex to the whitespace case

* trailing ws

* eliminate 'ignore_strings' option
  • Loading branch information
MichaelChirico authored Jul 29, 2023
1 parent 1b459fc commit c3c10bb
Showing 1 changed file with 26 additions and 52 deletions.
78 changes: 26 additions & 52 deletions R/make_linter_from_regex.R
Original file line number Diff line number Diff line change
@@ -1,73 +1,53 @@
make_linter_from_regex <- function(regex,
lint_type,
lint_msg,
ignore_strings = TRUE) {
# If a regex-based linter is found, only flag those lints that occur within
# a relevant section of source code
.in_ignorable_position <- function(source_expression, line_number, match) {
ignore_strings && in_string(source_expression, line_number, match)
}

lint_msg) {
function() {
Linter(function(source_expression) {
if (!is_lint_level(source_expression, "expression")) {
if (!is_lint_level(source_expression, "file")) {
return(list())
}

all_matches <- re_matches(
source_expression[["lines"]],
source_expression[["file_lines"]],
regex,
locations = TRUE,
global = TRUE
global = FALSE
)
all_matches <- all_matches[!is.na(all_matches$start), ]
all_matches$line_number <- as.integer(rownames(all_matches))

line_numbers <- as.integer(names(source_expression[["lines"]]))

lints <- Map(
function(line_matches, line_number) {
lapply(
split(line_matches, seq_len(nrow(line_matches))),
function(.match) {
if (
is.na(.match[["start"]]) ||
.in_ignorable_position(source_expression, line_number, .match)
) {
return()
}
start <- .match[["start"]]
end <- .match[["end"]]
Lint(
filename = source_expression[["filename"]],
line_number = line_number,
column_number = start,
type = lint_type,
message = lint_msg,
line = source_expression[["lines"]][[as.character(line_number)]],
ranges = list(c(start, end))
)
}
)
},
all_matches,
line_numbers
)
matches_by_row <- split(all_matches, seq_len(nrow(all_matches)))

Filter(function(x) any(lengths(x) > 0L), lints)
lints <- lapply(matches_by_row, function(.match) {
if (is_match_covered(.match, source_expression)) {
return()
}
Lint(
filename = source_expression[["filename"]],
line_number = .match$line_number,
type = lint_type,
message = lint_msg,
line = source_expression[["file_lines"]][[rownames(.match)]],
ranges = list(c(.match$start, .match$end))
)
})
lints[lengths(lints) > 0L]
})
}
}

#' Determine if a regex match is covered by an expression in a source_expression
#'
#' @param match The position where a regex match was observed.
#' match must have entries "start", "end", and "line_number".
#' @param source_expression A source_expression
#' @param line_number,match The position where a regex match was observed.
#' match must have entries "start" and "end".
#' @param token_type Restrict analysis to tokens of this type, for example,
#' with token_type = "STR_CONST" you can check that a regex match occurs
#' within a string
#' @noRd
is_match_covered <- function(source_expression, line_number, match, token_type = NULL) {
pc <- source_expression[["parsed_content"]]
is_match_covered <- function(match, source_expression, token_type = "STR_CONST") {
line_number <- match$line_number
pc <- source_expression[["full_parsed_content"]]
if (!is.null(token_type)) {
pc <- pc[pc[["token"]] == token_type, ]
}
Expand All @@ -92,9 +72,3 @@ is_match_covered <- function(source_expression, line_number, match, token_type =

any_single_line_covers() || any_multi_line_covers()
}

in_string <- function(source_expression, line_number, match) {
# do any of the strings in the parsed content contain the matched regex?

is_match_covered(source_expression, line_number, match, "STR_CONST")
}

0 comments on commit c3c10bb

Please sign in to comment.