From f4177a7cb1d4a820ea4fa3518f43f2db5fdb7980 Mon Sep 17 00:00:00 2001 From: Daniel Ramos Date: Thu, 27 Jun 2024 07:15:19 +0200 Subject: [PATCH] Change syntax for :[args+] to handle multiple node matching --- .../calculator_signature_change.py | 19 +------- src/models/concrete_syntax.rs | 45 +++++++++++-------- src/models/unit_tests/concrete_syntax_test.rs | 12 ++--- 3 files changed, 34 insertions(+), 42 deletions(-) diff --git a/plugins/spark_upgrade/calculator_signature_change.py b/plugins/spark_upgrade/calculator_signature_change.py index 9eddac349..223f41ad8 100644 --- a/plugins/spark_upgrade/calculator_signature_change.py +++ b/plugins/spark_upgrade/calculator_signature_change.py @@ -13,7 +13,7 @@ from execute_piranha import ExecutePiranha from polyglot_piranha import ( - Rule, Filter + Rule, ) @@ -40,11 +40,6 @@ def get_rules(self) -> List[Rule]: replace_node="*", replace="EntropyCalculator(:[stats], :[stats].sum.toLong)", holes={"entropy_calculator"}, - filters= { - Filter( # avoids infinite loop - not_contains=[("rgx .sum.toLong")], - ), - } ) # Rule to transform GiniCalculator() arguments @@ -54,11 +49,6 @@ def get_rules(self) -> List[Rule]: replace_node="*", replace="GiniCalculator(:[stats], :[stats].sum.toLong)", holes={"gini_calculator"}, - filters= { - Filter( # avoids infinite loop - not_contains=[("rgx .sum.toLong")], - ), - } ) transform_VarianceCalculator_args = Rule( @@ -67,11 +57,6 @@ def get_rules(self) -> List[Rule]: replace_node="*", replace="VarianceCalculator(:[stats], :[stats].sum.toLong)", holes={"variance_calculator"}, - filters= { - Filter( # avoids infinite loop - not_contains=[("rgx .sum.toLong")], - ), - } ) return [ transform_VarianceCalculator_args, @@ -80,4 +65,4 @@ def get_rules(self) -> List[Rule]: ] def summaries_to_custom_dict(self, _) -> Dict[str, Any]: - return {} + return {} \ No newline at end of file diff --git a/src/models/concrete_syntax.rs b/src/models/concrete_syntax.rs index bc440e9af..1b1ecb870 100644 --- a/src/models/concrete_syntax.rs +++ b/src/models/concrete_syntax.rs @@ -25,6 +25,7 @@ use crate::models::matches::Match; // Precompile the regex outside the function lazy_static! { static ref RE_VAR: Regex = Regex::new(r"^:\[(?P\w+)\]").unwrap(); + static ref RE_VAR_PLUS: Regex = Regex::new(r"^:\[(?P\w+)\+\]").unwrap(); } // Struct to avoid dealing with lifetimes @@ -33,6 +34,7 @@ pub struct CapturedNode { range: Range, text: String, } + #[derive(Clone, PartialEq, Eq)] struct MatchResult { mapping: HashMap, @@ -196,9 +198,12 @@ pub(crate) fn get_matches_for_subsequence_of_nodes( node = cursor.node(); } - if let Some(caps) = RE_VAR.captures(match_template) { + if let Some(caps) = RE_VAR_PLUS.captures(match_template) { + // If template starts with a template variable + handle_template_variable_matching(cursor, source_code, top_node, caps, match_template, true) + } else if let Some(caps) = RE_VAR.captures(match_template) { // If template starts with a template variable - handle_template_variable_matching(cursor, source_code, top_node, caps, match_template) + handle_template_variable_matching(cursor, source_code, top_node, caps, match_template, false) } else if node.child_count() == 0 { // If the current node if a leaf return handle_leaf_node(cursor, source_code, match_template, top_node); @@ -209,12 +214,17 @@ pub(crate) fn get_matches_for_subsequence_of_nodes( } } -/// This function is a bit convoluted because I have failed to simplify it further. -/// It basically matches a template variable against a subsequence of nodes. -/// Comments inline explaining what's going on during the matching process +/// This function does the template variable matching against entire tree nodes.function +/// Keep in my mind that it will only attempt to match the template variables against nodes +/// at either the current level of the traversal, or it's children. It can also operate on +/// single node templates [args], and multiple nodes templates :[args+]. + +/// For successful matches, it returns the assignment of each template varaible against a +/// particular range. The Option indicates whether a match was succesfull, and keeps +/// track of the last sibling node that was matched (wrt to the match_sequential_siblings function) fn handle_template_variable_matching( cursor: &mut TreeCursor, source_code: &[u8], top_node: &Node, caps: regex::Captures, - match_template: &str, + match_template: &str, one_plus: bool, ) -> (HashMap, Option) { let var_name = &caps["var_name"]; let cs_adv_len = caps[0].len(); @@ -240,6 +250,7 @@ fn handle_template_variable_matching( let mut is_final_sibling = false; loop { let mut tmp_cursor = next_node_cursor.clone(); + if let (mut recursive_matches, Some(last_matched_node_idx)) = get_matches_for_subsequence_of_nodes( &mut tmp_cursor, @@ -277,6 +288,7 @@ fn handle_template_variable_matching( // Append an extra node to match with :[var]. Remember we had advanced next_node_cursor before, // therefore we cannot advance it again, otherwise we would skip nodes. + // We only attempt to append an extra code if we are in one_plus matching mode. last_node = next_node_cursor.node(); if is_final_sibling { break; @@ -290,6 +302,10 @@ fn handle_template_variable_matching( if is_final_sibling { should_match = find_next_sibling(&mut next_node_cursor); } + + if !one_plus { + break; + } } // Move one level down, to attempt to match the template variable :[var] against smaller nodes. @@ -328,24 +344,15 @@ fn handle_leaf_node( (HashMap::new(), None) } -/// Finds the last matched node's index in the parent node's children. -/// -/// This function determines whether we finished our matching at a top-level child of the parent -/// node. If so, it returns the index of that child. -/// -/// # Arguments -/// -/// * `cursor` - A mutable reference to a `TreeCursor`. -/// * `parent_node` - A reference to the parent `Node`. -/// -/// # Returns +/// Finds the index of the last matched node relative to the `match_sequential_siblings` function. /// -/// * `Option` - The index of the matched child node, or `None` if no match is found. +/// This function checks if the matching concluded on a child of the node where `match_sequential_siblings` +/// was invoked. If so, it returns the index of that child. fn find_last_matched_node(cursor: &mut TreeCursor, parent_node: &Node) -> Option { parent_node .children(&mut parent_node.walk()) .enumerate() - .filter(|&(i, child)| child == cursor.node()) + .filter(|&(_i, child)| child == cursor.node()) .map(|(i, _child)| i - 1) .next() } diff --git a/src/models/unit_tests/concrete_syntax_test.rs b/src/models/unit_tests/concrete_syntax_test.rs index d221e5bfc..2d607f5fc 100644 --- a/src/models/unit_tests/concrete_syntax_test.rs +++ b/src/models/unit_tests/concrete_syntax_test.rs @@ -79,9 +79,9 @@ fn test_no_match() { fn test_trailing_comma() { run_test( "a.foo(x, // something about the first argument - y, // something about the second argumet + y, // something about the second argument );", - ":[var].foo(:[arg1], :[arg2])", + ":[var].foo(:[arg1], :[arg2+])", 2, vec![vec![("var", "a"), ("arg1", "x"), ("arg2", "y,")]], GO, @@ -92,7 +92,7 @@ fn test_trailing_comma() { fn test_sequential_siblings_matching() { run_test( "a.foo(x, y, z);", - ":[var].foo(:[arg1], z)", + ":[var].foo(:[arg1+], z)", 2, vec![vec![("var", "a"), ("arg1", "x, y")]], GO, @@ -117,7 +117,7 @@ fn test_sequential_siblings_stmts2() { // Find all usages of foo, whose last element is z. run_test( "x.foo(1,2,3,4);", - ":[var].foo(:[args]);", + ":[var].foo(:[args+]);", 2, vec![vec![("var", "x"), ("args", "1,2,3,4")]], JAVA, @@ -138,8 +138,8 @@ fn test_complex_template() { }}", "int :[var] = 0; while(:[var] < 100) { - :[body] - :[var]++; + :[body+] + :[var] ++; }", 1, vec![vec![