Skip to content

Commit

Permalink
Change syntax for :[args+] to handle multiple node matching
Browse files Browse the repository at this point in the history
  • Loading branch information
danieltrt committed Jun 27, 2024
1 parent 86c7ac4 commit f4177a7
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 42 deletions.
19 changes: 2 additions & 17 deletions plugins/spark_upgrade/calculator_signature_change.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from execute_piranha import ExecutePiranha

from polyglot_piranha import (
Rule, Filter
Rule,
)


Expand All @@ -40,11 +40,6 @@ def get_rules(self) -> List[Rule]:
replace_node="*",
replace="EntropyCalculator(:[stats], :[stats].sum.toLong)",
holes={"entropy_calculator"},
filters= {
Filter( # avoids infinite loop
not_contains=[("rgx .sum.toLong")],
),
}
)

# Rule to transform GiniCalculator() arguments
Expand All @@ -54,11 +49,6 @@ def get_rules(self) -> List[Rule]:
replace_node="*",
replace="GiniCalculator(:[stats], :[stats].sum.toLong)",
holes={"gini_calculator"},
filters= {
Filter( # avoids infinite loop
not_contains=[("rgx .sum.toLong")],
),
}
)

transform_VarianceCalculator_args = Rule(
Expand All @@ -67,11 +57,6 @@ def get_rules(self) -> List[Rule]:
replace_node="*",
replace="VarianceCalculator(:[stats], :[stats].sum.toLong)",
holes={"variance_calculator"},
filters= {
Filter( # avoids infinite loop
not_contains=[("rgx .sum.toLong")],
),
}
)
return [
transform_VarianceCalculator_args,
Expand All @@ -80,4 +65,4 @@ def get_rules(self) -> List[Rule]:
]

def summaries_to_custom_dict(self, _) -> Dict[str, Any]:
return {}
return {}
45 changes: 26 additions & 19 deletions src/models/concrete_syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use crate::models::matches::Match;
// Precompile the regex outside the function
lazy_static! {
static ref RE_VAR: Regex = Regex::new(r"^:\[(?P<var_name>\w+)\]").unwrap();
static ref RE_VAR_PLUS: Regex = Regex::new(r"^:\[(?P<var_name>\w+)\+\]").unwrap();
}

// Struct to avoid dealing with lifetimes
Expand All @@ -33,6 +34,7 @@ pub struct CapturedNode {
range: Range,
text: String,
}

#[derive(Clone, PartialEq, Eq)]
struct MatchResult {
mapping: HashMap<String, CapturedNode>,
Expand Down Expand Up @@ -196,9 +198,12 @@ pub(crate) fn get_matches_for_subsequence_of_nodes(
node = cursor.node();
}

if let Some(caps) = RE_VAR.captures(match_template) {
if let Some(caps) = RE_VAR_PLUS.captures(match_template) {
// If template starts with a template variable
handle_template_variable_matching(cursor, source_code, top_node, caps, match_template, true)
} else if let Some(caps) = RE_VAR.captures(match_template) {
// If template starts with a template variable
handle_template_variable_matching(cursor, source_code, top_node, caps, match_template)
handle_template_variable_matching(cursor, source_code, top_node, caps, match_template, false)
} else if node.child_count() == 0 {
// If the current node if a leaf
return handle_leaf_node(cursor, source_code, match_template, top_node);
Expand All @@ -209,12 +214,17 @@ pub(crate) fn get_matches_for_subsequence_of_nodes(
}
}

/// This function is a bit convoluted because I have failed to simplify it further.
/// It basically matches a template variable against a subsequence of nodes.
/// Comments inline explaining what's going on during the matching process
/// This function does the template variable matching against entire tree nodes.function
/// Keep in my mind that it will only attempt to match the template variables against nodes
/// at either the current level of the traversal, or it's children. It can also operate on
/// single node templates [args], and multiple nodes templates :[args+].

/// For successful matches, it returns the assignment of each template varaible against a
/// particular range. The Option<usize> indicates whether a match was succesfull, and keeps
/// track of the last sibling node that was matched (wrt to the match_sequential_siblings function)
fn handle_template_variable_matching(
cursor: &mut TreeCursor, source_code: &[u8], top_node: &Node, caps: regex::Captures,
match_template: &str,
match_template: &str, one_plus: bool,
) -> (HashMap<String, CapturedNode>, Option<usize>) {
let var_name = &caps["var_name"];
let cs_adv_len = caps[0].len();
Expand All @@ -240,6 +250,7 @@ fn handle_template_variable_matching(
let mut is_final_sibling = false;
loop {
let mut tmp_cursor = next_node_cursor.clone();

if let (mut recursive_matches, Some(last_matched_node_idx)) =
get_matches_for_subsequence_of_nodes(
&mut tmp_cursor,
Expand Down Expand Up @@ -277,6 +288,7 @@ fn handle_template_variable_matching(

// Append an extra node to match with :[var]. Remember we had advanced next_node_cursor before,
// therefore we cannot advance it again, otherwise we would skip nodes.
// We only attempt to append an extra code if we are in one_plus matching mode.
last_node = next_node_cursor.node();
if is_final_sibling {
break;
Expand All @@ -290,6 +302,10 @@ fn handle_template_variable_matching(
if is_final_sibling {
should_match = find_next_sibling(&mut next_node_cursor);
}

if !one_plus {
break;
}
}

// Move one level down, to attempt to match the template variable :[var] against smaller nodes.
Expand Down Expand Up @@ -328,24 +344,15 @@ fn handle_leaf_node(
(HashMap::new(), None)
}

/// Finds the last matched node's index in the parent node's children.
///
/// This function determines whether we finished our matching at a top-level child of the parent
/// node. If so, it returns the index of that child.
///
/// # Arguments
///
/// * `cursor` - A mutable reference to a `TreeCursor`.
/// * `parent_node` - A reference to the parent `Node`.
///
/// # Returns
/// Finds the index of the last matched node relative to the `match_sequential_siblings` function.
///
/// * `Option<usize>` - The index of the matched child node, or `None` if no match is found.
/// This function checks if the matching concluded on a child of the node where `match_sequential_siblings`
/// was invoked. If so, it returns the index of that child.
fn find_last_matched_node(cursor: &mut TreeCursor, parent_node: &Node) -> Option<usize> {
parent_node
.children(&mut parent_node.walk())
.enumerate()
.filter(|&(i, child)| child == cursor.node())
.filter(|&(_i, child)| child == cursor.node())
.map(|(i, _child)| i - 1)
.next()
}
Expand Down
12 changes: 6 additions & 6 deletions src/models/unit_tests/concrete_syntax_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ fn test_no_match() {
fn test_trailing_comma() {
run_test(
"a.foo(x, // something about the first argument
y, // something about the second argumet
y, // something about the second argument
);",
":[var].foo(:[arg1], :[arg2])",
":[var].foo(:[arg1], :[arg2+])",
2,
vec![vec![("var", "a"), ("arg1", "x"), ("arg2", "y,")]],
GO,
Expand All @@ -92,7 +92,7 @@ fn test_trailing_comma() {
fn test_sequential_siblings_matching() {
run_test(
"a.foo(x, y, z);",
":[var].foo(:[arg1], z)",
":[var].foo(:[arg1+], z)",
2,
vec![vec![("var", "a"), ("arg1", "x, y")]],
GO,
Expand All @@ -117,7 +117,7 @@ fn test_sequential_siblings_stmts2() {
// Find all usages of foo, whose last element is z.
run_test(
"x.foo(1,2,3,4);",
":[var].foo(:[args]);",
":[var].foo(:[args+]);",
2,
vec![vec![("var", "x"), ("args", "1,2,3,4")]],
JAVA,
Expand All @@ -138,8 +138,8 @@ fn test_complex_template() {
}}",
"int :[var] = 0;
while(:[var] < 100) {
:[body]
:[var]++;
:[body+]
:[var] ++;
}",
1,
vec![vec![
Expand Down

0 comments on commit f4177a7

Please sign in to comment.