From 76951540454b97f6e98665af82b917bcdc8d99ce Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 15:12:29 -0700 Subject: [PATCH] Refactoring globMatch using simpleMatchWithNormalizedStrings from Regex (#13104) (#13176) * Refactoring globMatch using simpleMatchWithNormalizedStrings from Regex * Adding entry to CHANGELOG.md * Adding tests for GlobMatch * Moving entry to Changed section in CHANGELOG.md --------- (cherry picked from commit f2e2a85e948d486d9041b9642c85ebe4614d7bb4) Signed-off-by: Niyati Aggarwal Signed-off-by: github-actions[bot] Co-authored-by: github-actions[bot] --- CHANGELOG.md | 1 + .../main/java/org/opensearch/common/Glob.java | 53 ++++++++------- .../org/opensearch/common/regex/Regex.java | 35 +--------- .../java/org/opensearch/common/GlobTests.java | 67 +++++++++++++++++++ 4 files changed, 97 insertions(+), 59 deletions(-) create mode 100644 server/src/test/java/org/opensearch/common/GlobTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 34330cbaedc7c..1e0a12f31ed87 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [BWC and API enforcement] Enforcing the presence of API annotations at build time ([#12872](https://github.com/opensearch-project/OpenSearch/pull/12872)) - Improve built-in secure transports support ([#12907](https://github.com/opensearch-project/OpenSearch/pull/12907)) - Update links to documentation in rest-api-spec ([#13043](https://github.com/opensearch-project/OpenSearch/pull/13043)) +- Refactoring globMatch using simpleMatchWithNormalizedStrings from Regex ([#13104](https://github.com/opensearch-project/OpenSearch/pull/13104)) ### Deprecated diff --git a/libs/common/src/main/java/org/opensearch/common/Glob.java b/libs/common/src/main/java/org/opensearch/common/Glob.java index daf045dd49e3a..b390a3ca84182 100644 --- a/libs/common/src/main/java/org/opensearch/common/Glob.java +++ b/libs/common/src/main/java/org/opensearch/common/Glob.java @@ -52,34 +52,35 @@ public static boolean globMatch(String pattern, String str) { if (pattern == null || str == null) { return false; } - int firstIndex = pattern.indexOf('*'); - if (firstIndex == -1) { - return pattern.equals(str); - } - if (firstIndex == 0) { - if (pattern.length() == 1) { - return true; - } - int nextIndex = pattern.indexOf('*', firstIndex + 1); - if (nextIndex == -1) { - return str.endsWith(pattern.substring(1)); - } else if (nextIndex == 1) { - // Double wildcard "**" - skipping the first "*" - return globMatch(pattern.substring(1), str); + int sIdx = 0, pIdx = 0, match = 0, wildcardIdx = -1; + while (sIdx < str.length()) { + // both chars matching, incrementing both pointers + if (pIdx < pattern.length() && str.charAt(sIdx) == pattern.charAt(pIdx)) { + sIdx++; + pIdx++; + } else if (pIdx < pattern.length() && pattern.charAt(pIdx) == '*') { + // wildcard found, only incrementing pattern pointer + wildcardIdx = pIdx; + match = sIdx; + pIdx++; + } else if (wildcardIdx != -1) { + // last pattern pointer was a wildcard, incrementing string pointer + pIdx = wildcardIdx + 1; + match++; + sIdx = match; + } else { + // current pattern pointer is not a wildcard, last pattern pointer was also not a wildcard + // characters do not match + return false; } - String part = pattern.substring(1, nextIndex); - int partIndex = str.indexOf(part); - while (partIndex != -1) { - if (globMatch(pattern.substring(nextIndex), str.substring(partIndex + part.length()))) { - return true; - } - partIndex = str.indexOf(part, partIndex + 1); - } - return false; } - return (str.length() >= firstIndex - && pattern.substring(0, firstIndex).equals(str.substring(0, firstIndex)) - && globMatch(pattern.substring(firstIndex), str.substring(firstIndex))); + + // check for remaining characters in pattern + while (pIdx < pattern.length() && pattern.charAt(pIdx) == '*') { + pIdx++; + } + + return pIdx == pattern.length(); } } diff --git a/server/src/main/java/org/opensearch/common/regex/Regex.java b/server/src/main/java/org/opensearch/common/regex/Regex.java index 323b460af62df..6d8b5c3585c4c 100644 --- a/server/src/main/java/org/opensearch/common/regex/Regex.java +++ b/server/src/main/java/org/opensearch/common/regex/Regex.java @@ -35,6 +35,7 @@ import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Operations; +import org.opensearch.common.Glob; import org.opensearch.core.common.Strings; import java.util.ArrayList; @@ -125,39 +126,7 @@ public static boolean simpleMatch(String pattern, String str, boolean caseInsens pattern = Strings.toLowercaseAscii(pattern); str = Strings.toLowercaseAscii(str); } - return simpleMatchWithNormalizedStrings(pattern, str); - } - - private static boolean simpleMatchWithNormalizedStrings(String pattern, String str) { - int sIdx = 0, pIdx = 0, match = 0, wildcardIdx = -1; - while (sIdx < str.length()) { - // both chars matching, incrementing both pointers - if (pIdx < pattern.length() && str.charAt(sIdx) == pattern.charAt(pIdx)) { - sIdx++; - pIdx++; - } else if (pIdx < pattern.length() && pattern.charAt(pIdx) == '*') { - // wildcard found, only incrementing pattern pointer - wildcardIdx = pIdx; - match = sIdx; - pIdx++; - } else if (wildcardIdx != -1) { - // last pattern pointer was a wildcard, incrementing string pointer - pIdx = wildcardIdx + 1; - match++; - sIdx = match; - } else { - // current pattern pointer is not a wildcard, last pattern pointer was also not a wildcard - // characters do not match - return false; - } - } - - // check for remaining characters in pattern - while (pIdx < pattern.length() && pattern.charAt(pIdx) == '*') { - pIdx++; - } - - return pIdx == pattern.length(); + return Glob.globMatch(pattern, str); } /** diff --git a/server/src/test/java/org/opensearch/common/GlobTests.java b/server/src/test/java/org/opensearch/common/GlobTests.java new file mode 100644 index 0000000000000..2bbe157be43cc --- /dev/null +++ b/server/src/test/java/org/opensearch/common/GlobTests.java @@ -0,0 +1,67 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common; + +import org.opensearch.test.OpenSearchTestCase; + +public class GlobTests extends OpenSearchTestCase { + + public void testGlobMatchForNull() { + assertFalse(Glob.globMatch(null, "test")); + assertFalse(Glob.globMatch("test", null)); + assertFalse(Glob.globMatch(null, null)); + } + + public void testGlobMatchNoWildcard() { + assertTrue(Glob.globMatch("abcd", "abcd")); + assertFalse(Glob.globMatch("abcd", "foobar")); + } + + public void testGlobMatchSingleWildcard() { + assertTrue(Glob.globMatch("*foo", "barfoo")); + assertFalse(Glob.globMatch("*foo", "foobar")); + assertTrue(Glob.globMatch("foo*", "foobarfoo")); + assertFalse(Glob.globMatch("foo*", "barfoobar")); + assertTrue(Glob.globMatch("foo*bar", "foobarnfoosbar")); + } + + public void testGlobMatchMultipleWildcards() { + assertTrue(Glob.globMatch("*foo*", "barfoobar")); + assertFalse(Glob.globMatch("*foo*", "baroofbar")); + assertTrue(Glob.globMatch("*foo*bar", "abcdfooefghbar")); + assertFalse(Glob.globMatch("*foo*bar", "foonotbars")); + } + + public void testGlobalMatchDoubleWildcard() { + assertTrue(Glob.globMatch("**foo", "barbarfoo")); + assertFalse(Glob.globMatch("**foo", "barbarfoowoof")); + assertTrue(Glob.globMatch("**bar**", "foobarfoo")); + assertFalse(Glob.globMatch("**bar**", "foobanfoo")); + } + + public void testGlobMatchMultipleCharactersWithSingleWildcard() { + assertTrue(Glob.globMatch("a*b", "acb")); + assertTrue(Glob.globMatch("f*oo", "foo")); + assertTrue(Glob.globMatch("a*b", "aab")); + assertTrue(Glob.globMatch("a*b", "aaab")); + } + + public void testGlobMatchWildcardWithEmptyString() { + assertTrue(Glob.globMatch("*", "")); + assertTrue(Glob.globMatch("a*", "a")); + assertFalse(Glob.globMatch("a*", "")); + } + + public void testGlobMatchMultipleWildcardsWithMultipleCharacters() { + assertTrue(Glob.globMatch("a*b*c", "abc")); + assertTrue(Glob.globMatch("a*b*c", "axxxbxbc")); + assertFalse(Glob.globMatch("a*b*c", "abca")); + assertFalse(Glob.globMatch("a*b*c", "ac")); + } +}