From 3336f214398262df1f5317070d3b2699b43958da Mon Sep 17 00:00:00 2001 From: s-heppner Date: Thu, 15 Aug 2024 15:23:42 +0200 Subject: [PATCH 1/2] Add RFC 2396 This adds the ABNF from RFC 2396, the original specification of the URI syntax from 1998. Please note, that RFC 2396 was succeeded by RFC 3986 in 2005, which introduced concepts like IPv6 to the URI syntax specification. --- AUTHORS | 1 + test_data/nested-python/rfc2396/expected.err | 0 test_data/nested-python/rfc2396/expected.py | 49 ++++++++++++ test_data/nested-python/rfc2396/grammar.abnf | 82 ++++++++++++++++++++ 4 files changed, 132 insertions(+) create mode 100644 test_data/nested-python/rfc2396/expected.err create mode 100644 test_data/nested-python/rfc2396/expected.py create mode 100644 test_data/nested-python/rfc2396/grammar.abnf diff --git a/AUTHORS b/AUTHORS index f9f5386..9dcf846 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,2 +1,3 @@ Marko Ristin (marko@ristin.ch, marko.ristin@gmail.com, rist@zhaw.ch) for Zurich University of Applied Sciences (ZHAW) Oliver Steensen-Bech Haagh (oliver@dmc.international) for Digital Maritime Consultancy ApS +Sebastian Heppner (s.heppner@iat.rwth-aachen.de, mail@s-heppner.com) for RWTH Aachen University diff --git a/test_data/nested-python/rfc2396/expected.err b/test_data/nested-python/rfc2396/expected.err new file mode 100644 index 0000000..e69de29 diff --git a/test_data/nested-python/rfc2396/expected.py b/test_data/nested-python/rfc2396/expected.py new file mode 100644 index 0000000..63a6209 --- /dev/null +++ b/test_data/nested-python/rfc2396/expected.py @@ -0,0 +1,49 @@ +alphanum = '[a-zA-Z0-9]' +mark = "[\\-\\-.!~*'()]" +unreserved = f'({alphanum}|{mark})' +hex = ( + '([0-9]|[aA]|[bB]|[cC]|[dD]|[eE]|[fF]|[aA]|[bB]|[cC]|[dD]|[e' + 'E]|[fF])' +) +escaped = f'%{hex}{hex}' +pchar = f'({unreserved}|{escaped}|[:@&=+$,])' +param = f'({pchar})*' +segment = f'({pchar})*(;{param})*' +path_segments = f'{segment}(/{segment})*' +abs_path = f'/{path_segments}' +scheme = '[a-zA-Z][a-zA-Z0-9+\\-.]*' +userinfo = f'({unreserved}|{escaped}|[;:&=+$,])*' +domainlabel = f'({alphanum}|{alphanum}({alphanum}|-)*{alphanum})' +toplabel = f'([a-zA-Z]|[a-zA-Z]({alphanum}|-)*{alphanum})' +hostname = f'({domainlabel}\\.)*{toplabel}(\\.)?' +ipv4address = '[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+' +host = f'({hostname}|{ipv4address})' +port = '[0-9]*' +hostport = f'{host}(:{port})?' +server = f'(({userinfo}@)?{hostport})?' +reg_name = f'({unreserved}|{escaped}|[$,;:@&=+])+' +authority = f'({server}|{reg_name})' +net_path = f'//{authority}({abs_path})?' +reserved = '[;/?:@&=+$,]' +uric = f'({reserved}|{unreserved}|{escaped})' +query = f'({uric})*' +hier_part = f'({net_path}|{abs_path})(\\?{query})?' +uric_no_slash = f'({unreserved}|{escaped}|[;?:@&=+$,])' +opaque_part = f'{uric_no_slash}({uric})*' +absoluteuri = f'{scheme}:({hier_part}|{opaque_part})' +fragment = f'({uric})*' +lowalpha = ( + '([aA]|[bB]|[cC]|[dD]|[eE]|[fF]|[gG]|[hH]|[iI]|[jJ]|[k' + 'K]|[lL]|[mM]|[nN]|[oO]|[pP]|[qQ]|[rR]|[sS]|[tT]|[uU]|[v' + 'V]|[wW]|[xX]|[yY]|[zZ])' +) +path = f'({abs_path}|{opaque_part})?' +rel_segment = f'({unreserved}|{escaped}|[;@&=+$,])+' +rel_path = f'{rel_segment}({abs_path})?' +relativeuri = f'({net_path}|{abs_path}|{rel_path})(\\?{query})?' +upalpha = ( + '([aA]|[bB]|[cC]|[dD]|[eE]|[fF]|[gG]|[hH]|[iI]|[jJ]|[kK]|' + '[lL]|[mM]|[nN]|[oO]|[pP]|[qQ]|[rR]|[sS]|[tT]|[uU]|[vV]|' + '[wW]|[xX]|[yY]|[zZ])' +) +uri_reference = f'({absoluteuri}|{relativeuri})?(\\#{fragment})?' diff --git a/test_data/nested-python/rfc2396/grammar.abnf b/test_data/nested-python/rfc2396/grammar.abnf new file mode 100644 index 0000000..f25f71e --- /dev/null +++ b/test_data/nested-python/rfc2396/grammar.abnf @@ -0,0 +1,82 @@ +; From: https://www.ietf.org/rfc/rfc2396.txt +; Note that RFC 3986 is a newer version of the URI standard + +alpha = lowalpha / upalpha + +lowalpha = "a" / "b" / "c" / "d" / "e" / "f" / "g" / "h" / "i" / + "j" / "k" / "l" / "m" / "n" / "o" / "p" / "q" / "r" / + "s" / "t" / "u" / "v" / "w" / "x" / "y" / "z" + +upalpha = "A" / "B" / "C" / "D" / "E" / "F" / "G" / "H" / "I" / + "J" / "K" / "L" / "M" / "N" / "O" / "P" / "Q" / "R" / + "S" / "T" / "U" / "V" / "W" / "X" / "Y" / "Z" + +digit = "0" / "1" / "2" / "3" / "4" / "5" / "6" / "7" / + "8" / "9" + +alphanum = alpha / digit + +uric = reserved / unreserved / escaped + +reserved = ";" / "/" / "?" / ":" / "@" / "&" / "=" / "+" / + "$" / "," + +unreserved = alphanum / mark + +mark = "-" / "-" / "." / "!" / "~" / "*" / "'" / "(" / ")" + +escaped = "%" hex hex + +hex = digit / "A" / "B" / "C" / "D" / "E" / "F" / + "a" / "b" / "c" / "d" / "e" / "f" + +absoluteURI = scheme ":" ( hier-part / opaque-part ) + +hier-part = ( net-path / abs-path ) [ "?" query ] +net-path = "//" authority [ abs-path ] +abs-path = "/" path-segments + +opaque-part = uric-no-slash *uric + +uric-no-slash = unreserved / escaped / ";" / "?" / ":" / "@" / + "&" / "=" / "+" / "$" / "," + +scheme = alpha *( alpha / digit / "+" / "-" / "." ) + +authority = server / reg-name + +reg-name = 1*( unreserved / escaped / "$" / "," / + ";" / ":" / "@" / "&" / "=" / "+" ) + +server = [ [ userinfo "@" ] hostport ] + +userinfo = *( unreserved / escaped / + ";" / ":" / "&" / "=" / "+" / "$" / "," ) + +hostport = host [ ":" port ] +host = hostname / IPv4address +hostname = *( domainlabel "." ) toplabel [ "." ] +domainlabel = alphanum / alphanum *( alphanum / "-" ) alphanum +toplabel = alpha / alpha *( alphanum / "-" ) alphanum + +IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit +port = *digit + +path = [ abs-path / opaque-part ] +path-segments = segment *( "/" segment ) +segment = *pchar *( ";" param ) +param = *pchar +pchar = unreserved / escaped / + ":" / "@" / "&" / "=" / "+" / "$" / "," + +query = *uric + +URI-reference = [ absoluteURI / relativeURI ] [ "#" fragment ] + +fragment = *uric + +relativeURI = ( net-path / abs-path / rel-path ) [ "?" query ] + +rel-path = rel-segment [ abs-path ] +rel-segment = 1*( unreserved / escaped / + ";" / "@" / "&" / "=" / "+" / "$" / "," ) From e2e9594808b1d9c9d49cca2ce1f24b0fc6274e6b Mon Sep 17 00:00:00 2001 From: Marko Ristin Date: Fri, 16 Aug 2024 07:57:59 +0500 Subject: [PATCH 2/2] Update test_data/nested-python/rfc2396/grammar.abnf --- test_data/nested-python/rfc2396/grammar.abnf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test_data/nested-python/rfc2396/grammar.abnf b/test_data/nested-python/rfc2396/grammar.abnf index f25f71e..9eda490 100644 --- a/test_data/nested-python/rfc2396/grammar.abnf +++ b/test_data/nested-python/rfc2396/grammar.abnf @@ -1,5 +1,6 @@ ; From: https://www.ietf.org/rfc/rfc2396.txt -; Note that RFC 3986 is a newer version of the URI standard +; Note that RFC 3986 is a newer version of the URI standard. + alpha = lowalpha / upalpha