From a73b25eae3b567e4b65b2585014099151f67b967 Mon Sep 17 00:00:00 2001 From: Zach McCormick Date: Thu, 15 Feb 2024 14:58:22 -0500 Subject: [PATCH] Add support in regular expressions for UTF-8 whitespace detection --- lib/liquid.rb | 4 ++-- lib/liquid/block_body.rb | 8 ++++---- lib/liquid/expression.rb | 2 +- lib/liquid/lexer.rb | 4 ++-- lib/liquid/tags/assign.rb | 2 +- lib/liquid/tags/case.rb | 2 +- lib/liquid/tags/cycle.rb | 4 ++-- lib/liquid/tags/for.rb | 2 +- lib/liquid/tags/if.rb | 4 ++-- lib/liquid/tags/include.rb | 2 +- lib/liquid/tags/inline_comment.rb | 2 +- lib/liquid/tags/raw.rb | 2 +- lib/liquid/tags/render.rb | 2 +- lib/liquid/tags/table_row.rb | 2 +- lib/liquid/variable.rb | 6 +++--- performance/shopify/paginate.rb | 2 +- test/integration/tags/include_tag_test.rb | 2 +- test/integration/whitespace_test.rb | 15 +++++++++++++++ 18 files changed, 41 insertions(+), 26 deletions(-) create mode 100644 test/integration/whitespace_test.rb diff --git a/lib/liquid.rb b/lib/liquid.rb index eba84e140..b015187da 100644 --- a/lib/liquid.rb +++ b/lib/liquid.rb @@ -36,8 +36,8 @@ module Liquid VariableEnd = /\}\}/ VariableIncompleteEnd = /\}\}?/ QuotedString = /"[^"]*"|'[^']*'/ - QuotedFragment = /#{QuotedString}|(?:[^\s,\|'"]|#{QuotedString})+/o - TagAttributes = /(\w[\w-]*)\s*\:\s*(#{QuotedFragment})/o + QuotedFragment = /#{QuotedString}|(?:[^[[:space:]],\|'"]|#{QuotedString})+/o + TagAttributes = /(\w[\w-]*)[[:space:]]*\:[[:space:]]*(#{QuotedFragment})/o AnyStartingTag = /#{TagStart}|#{VariableStart}/o PartialTemplateParser = /#{TagStart}.*?#{TagEnd}|#{VariableStart}.*?#{VariableIncompleteEnd}/om TemplateParser = /(#{PartialTemplateParser}|#{AnyStartingTag})/om diff --git a/lib/liquid/block_body.rb b/lib/liquid/block_body.rb index 61096de80..3a050019f 100644 --- a/lib/liquid/block_body.rb +++ b/lib/liquid/block_body.rb @@ -4,11 +4,11 @@ module Liquid class BlockBody - LiquidTagToken = /\A\s*(#{TagName})\s*(.*?)\z/o - FullToken = /\A#{TagStart}#{WhitespaceControl}?(\s*)(#{TagName})(\s*)(.*?)#{WhitespaceControl}?#{TagEnd}\z/om - FullTokenPossiblyInvalid = /\A(.*)#{TagStart}#{WhitespaceControl}?\s*(\w+)\s*(.*)?#{WhitespaceControl}?#{TagEnd}\z/om + LiquidTagToken = /\A[[:space:]]*(#{TagName})[[:space:]]*(.*?)\z/o + FullToken = /\A#{TagStart}#{WhitespaceControl}?([[:space:]]*)(#{TagName})([[:space:]]*)(.*?)#{WhitespaceControl}?#{TagEnd}\z/om + FullTokenPossiblyInvalid = /\A(.*)#{TagStart}#{WhitespaceControl}?[[:space:]]*(\w+)[[:space:]]*(.*)?#{WhitespaceControl}?#{TagEnd}\z/om ContentOfVariable = /\A#{VariableStart}#{WhitespaceControl}?(.*?)#{WhitespaceControl}?#{VariableEnd}\z/om - WhitespaceOrNothing = /\A\s*\z/ + WhitespaceOrNothing = /\A[[:space:]]*\z/ TAGSTART = "{%" VARSTART = "{{" diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index a1426732c..3583659b4 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -18,7 +18,7 @@ class Expression # Use an atomic group (?>...) to avoid pathological backtracing from # malicious input as described in https://github.com/Shopify/liquid/issues/1357 - RANGES_REGEX = /\A\(\s*(?>(\S+)\s*\.\.)\s*(\S+)\s*\)\z/ + RANGES_REGEX = /\A\([[:space:]]*(?>(\S+)[[:space:]]*\.\.)[[:space:]]*(\S+)[[:space:]]*\)\z/ def self.parse(markup) return nil unless markup diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb index 4ce2bc7b9..f048d659a 100644 --- a/lib/liquid/lexer.rb +++ b/lib/liquid/lexer.rb @@ -21,8 +21,8 @@ class Lexer STRING_LITERAL = Regexp.union(SINGLE_STRING_LITERAL, DOUBLE_STRING_LITERAL) NUMBER_LITERAL = /-?\d+(\.\d+)?/ DOTDOT = /\.\./ - COMPARISON_OPERATOR = /==|!=|<>|<=?|>=?|contains(?=\s)/ - WHITESPACE_OR_NOTHING = /\s*/ + COMPARISON_OPERATOR = /==|!=|<>|<=?|>=?|contains(?=[[:space:]])/ + WHITESPACE_OR_NOTHING = /[[:space:]]*/ def initialize(input) @ss = StringScanner.new(input) diff --git a/lib/liquid/tags/assign.rb b/lib/liquid/tags/assign.rb index 9eff9796e..91c67e9d5 100644 --- a/lib/liquid/tags/assign.rb +++ b/lib/liquid/tags/assign.rb @@ -14,7 +14,7 @@ module Liquid # @liquid_syntax_keyword variable_name The name of the variable being created. # @liquid_syntax_keyword value The value you want to assign to the variable. class Assign < Tag - Syntax = /(#{VariableSignature}+)\s*=\s*(.*)\s*/om + Syntax = /(#{VariableSignature}+)[[:space:]]*=[[:space:]]*(.*)[[:space:]]*/om # @api private def self.raise_syntax_error(parse_context) diff --git a/lib/liquid/tags/case.rb b/lib/liquid/tags/case.rb index 4fe4cd98c..11b00163a 100644 --- a/lib/liquid/tags/case.rb +++ b/lib/liquid/tags/case.rb @@ -24,7 +24,7 @@ module Liquid # @liquid_syntax_keyword third_expression An expression to be rendered when the variable's value has no match. class Case < Block Syntax = /(#{QuotedFragment})/o - WhenSyntax = /(#{QuotedFragment})(?:(?:\s+or\s+|\s*\,\s*)(#{QuotedFragment}.*))?/om + WhenSyntax = /(#{QuotedFragment})(?:(?:[[:space:]]+or[[:space:]]+|[[:space:]]*\,[[:space:]]*)(#{QuotedFragment}.*))?/om attr_reader :blocks, :left diff --git a/lib/liquid/tags/cycle.rb b/lib/liquid/tags/cycle.rb index 00695265d..76cd9c9be 100644 --- a/lib/liquid/tags/cycle.rb +++ b/lib/liquid/tags/cycle.rb @@ -16,7 +16,7 @@ module Liquid # {% cycle string, string, ... %} class Cycle < Tag SimpleSyntax = /\A#{QuotedFragment}+/o - NamedSyntax = /\A(#{QuotedFragment})\s*\:\s*(.*)/om + NamedSyntax = /\A(#{QuotedFragment})[[:space:]]*\:[[:space:]]*(.*)/om attr_reader :variables @@ -61,7 +61,7 @@ def render_to_output_buffer(context, output) def variables_from_string(markup) markup.split(',').collect do |var| - var =~ /\s*(#{QuotedFragment})\s*/o + var =~ /[[:space:]]*(#{QuotedFragment})[[:space:]]*/o Regexp.last_match(1) ? parse_expression(Regexp.last_match(1)) : nil end.compact end diff --git a/lib/liquid/tags/for.rb b/lib/liquid/tags/for.rb index 9205fa42f..c0b1746b9 100644 --- a/lib/liquid/tags/for.rb +++ b/lib/liquid/tags/for.rb @@ -25,7 +25,7 @@ module Liquid # @liquid_optional_param range [untyped] A custom numeric range to iterate over. # @liquid_optional_param reversed [untyped] Iterate in reverse order. class For < Block - Syntax = /\A(#{VariableSegment}+)\s+in\s+(#{QuotedFragment}+)\s*(reversed)?/o + Syntax = /\A(#{VariableSegment}+)[[:space:]]+in[[:space:]]+(#{QuotedFragment}+)[[:space:]]*(reversed)?/o attr_reader :collection_name, :variable_name, :limit, :from diff --git a/lib/liquid/tags/if.rb b/lib/liquid/tags/if.rb index a5cc84697..60b26c984 100644 --- a/lib/liquid/tags/if.rb +++ b/lib/liquid/tags/if.rb @@ -14,8 +14,8 @@ module Liquid # @liquid_syntax_keyword condition The condition to evaluate. # @liquid_syntax_keyword expression The expression to render if the condition is met. class If < Block - Syntax = /(#{QuotedFragment})\s*([=!<>a-z_]+)?\s*(#{QuotedFragment})?/o - ExpressionsAndOperators = /(?:\b(?:\s?and\s?|\s?or\s?)\b|(?:\s*(?!\b(?:\s?and\s?|\s?or\s?)\b)(?:#{QuotedFragment}|\S+)\s*)+)/o + Syntax = /(#{QuotedFragment})[[:space:]]*([=!<>a-z_]+)?[[:space:]]*(#{QuotedFragment})?/o + ExpressionsAndOperators = /(?:\b(?:[[:space:]]?and[[:space:]]?|[[:space:]]?or[[:space:]]?)\b|(?:[[:space:]]*(?!\b(?:[[:space:]]?and[[:space:]]?|[[:space:]]?or[[:space:]]?)\b)(?:#{QuotedFragment}|\S+)[[:space:]]*)+)/o BOOLEAN_OPERATORS = %w(and or).freeze attr_reader :blocks diff --git a/lib/liquid/tags/include.rb b/lib/liquid/tags/include.rb index 7b9685eeb..df78ba79a 100644 --- a/lib/liquid/tags/include.rb +++ b/lib/liquid/tags/include.rb @@ -20,7 +20,7 @@ module Liquid class Include < Tag prepend Tag::Disableable - SYNTAX = /(#{QuotedFragment}+)(\s+(?:with|for)\s+(#{QuotedFragment}+))?(\s+(?:as)\s+(#{VariableSegment}+))?/o + SYNTAX = /(#{QuotedFragment}+)([[:space:]]+(?:with|for)[[:space:]]+(#{QuotedFragment}+))?([[:space:]]+(?:as)[[:space:]]+(#{VariableSegment}+))?/o Syntax = SYNTAX attr_reader :template_name_expr, :variable_name_expr, :attributes diff --git a/lib/liquid/tags/inline_comment.rb b/lib/liquid/tags/inline_comment.rb index 493cfddc7..d50df27c0 100644 --- a/lib/liquid/tags/inline_comment.rb +++ b/lib/liquid/tags/inline_comment.rb @@ -12,7 +12,7 @@ def initialize(tag_name, markup, options) # # As such, we're forcing users to put a # symbol on every line otherwise this # tag will throw an error. - if markup.match?(/\n\s*[^#\s]/) + if markup.match?(/\n[[:space:]]*[^#[[:space:]]]/) raise SyntaxError, options[:locale].t("errors.syntax.inline_comment_invalid") end end diff --git a/lib/liquid/tags/raw.rb b/lib/liquid/tags/raw.rb index 02ee2b3cb..33bfd8c06 100644 --- a/lib/liquid/tags/raw.rb +++ b/lib/liquid/tags/raw.rb @@ -13,7 +13,7 @@ module Liquid # {% endraw %} # @liquid_syntax_keyword expression The expression to be output without being rendered. class Raw < Block - Syntax = /\A\s*\z/ + Syntax = /\A[[:space:]]*\z/ def initialize(tag_name, markup, parse_context) super diff --git a/lib/liquid/tags/render.rb b/lib/liquid/tags/render.rb index b9ae58ea0..59cf7a802 100644 --- a/lib/liquid/tags/render.rb +++ b/lib/liquid/tags/render.rb @@ -27,7 +27,7 @@ module Liquid # @liquid_syntax_keyword filename The name of the snippet to render, without the `.liquid` extension. class Render < Tag FOR = 'for' - SYNTAX = /(#{QuotedString}+)(\s+(with|#{FOR})\s+(#{QuotedFragment}+))?(\s+(?:as)\s+(#{VariableSegment}+))?/o + SYNTAX = /(#{QuotedString}+)([[:space:]]+(with|#{FOR})[[:space:]]+(#{QuotedFragment}+))?([[:space:]]+(?:as)[[:space:]]+(#{VariableSegment}+))?/o disable_tags "include" diff --git a/lib/liquid/tags/table_row.rb b/lib/liquid/tags/table_row.rb index be9a64c44..a1c0d948a 100644 --- a/lib/liquid/tags/table_row.rb +++ b/lib/liquid/tags/table_row.rb @@ -24,7 +24,7 @@ module Liquid # @liquid_optional_param offset [number] The 1-based index to start iterating at. # @liquid_optional_param range [untyped] A custom numeric range to iterate over. class TableRow < Block - Syntax = /(\w+)\s+in\s+(#{QuotedFragment}+)/o + Syntax = /(\w+)[[:space:]]+in[[:space:]]+(#{QuotedFragment}+)/o attr_reader :variable_name, :collection_name, :attributes diff --git a/lib/liquid/variable.rb b/lib/liquid/variable.rb index 1a4c07198..bbd6c7761 100644 --- a/lib/liquid/variable.rb +++ b/lib/liquid/variable.rb @@ -12,9 +12,9 @@ module Liquid # {{ user | link }} # class Variable - FilterMarkupRegex = /#{FilterSeparator}\s*(.*)/om - FilterParser = /(?:\s+|#{QuotedFragment}|#{ArgumentSeparator})+/o - FilterArgsRegex = /(?:#{FilterArgumentSeparator}|#{ArgumentSeparator})\s*((?:\w+\s*\:\s*)?#{QuotedFragment})/o + FilterMarkupRegex = /#{FilterSeparator}[[:space:]]*(.*)/om + FilterParser = /(?:[[:space:]]+|#{QuotedFragment}|#{ArgumentSeparator})+/o + FilterArgsRegex = /(?:#{FilterArgumentSeparator}|#{ArgumentSeparator})[[:space:]]*((?:\w+[[:space:]]*\:[[:space:]]*)?#{QuotedFragment})/o JustTagAttributes = /\A#{TagAttributes}\z/o MarkupWithQuotedFragment = /(#{QuotedFragment})(.*)/om diff --git a/performance/shopify/paginate.rb b/performance/shopify/paginate.rb index 3154d1838..0e78ed1c5 100644 --- a/performance/shopify/paginate.rb +++ b/performance/shopify/paginate.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true class Paginate < Liquid::Block - Syntax = /(#{Liquid::QuotedFragment})\s*(by\s*(\d+))?/ + Syntax = /(#{Liquid::QuotedFragment})[[:space:]]*(by[[:space:]]*(\d+))?/ def initialize(tag_name, markup, options) super diff --git a/test/integration/tags/include_tag_test.rb b/test/integration/tags/include_tag_test.rb index b86d40eee..9407fad0d 100644 --- a/test/integration/tags/include_tag_test.rb +++ b/test/integration/tags/include_tag_test.rb @@ -29,7 +29,7 @@ def read_template_file(_template_path) end class CustomInclude < Liquid::Tag - Syntax = /(#{Liquid::QuotedFragment}+)(\s+(?:with|for)\s+(#{Liquid::QuotedFragment}+))?/o + Syntax = /(#{Liquid::QuotedFragment}+)([[:space:]]+(?:with|for)[[:space:]]+(#{Liquid::QuotedFragment}+))?/o def initialize(tag_name, markup, tokens) markup =~ Syntax diff --git a/test/integration/whitespace_test.rb b/test/integration/whitespace_test.rb new file mode 100644 index 000000000..e4669fd86 --- /dev/null +++ b/test/integration/whitespace_test.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +require 'test_helper' +require 'timeout' + +class WhitespaceTest < Minitest::Test + include Liquid + + + def test_if_0xa0_utf8_whitespace_parses_correctly + utf8_0xa0 = "\u00A0" + assert_template_result('one', "{% if foo ==#{utf8_0xa0}1 %}one{% endif %}", { 'foo' => IntegerDrop.new('1') }) + end + +end