Skip to content

Commit

Permalink
Add RFC 2396
Browse files Browse the repository at this point in the history
This adds the ABNF from RFC 2396, the original
specification of the URI syntax from 1998.

Please note, that RFC 2396 was succeeded by
RFC 3986 in 2005, which introduced concepts like
IPv6 to the URI syntax specification.
  • Loading branch information
s-heppner committed Aug 15, 2024
1 parent 89434bc commit 3336f21
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 0 deletions.
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Marko Ristin ([email protected], [email protected], [email protected]) for Zurich University of Applied Sciences (ZHAW)
Oliver Steensen-Bech Haagh ([email protected]) for Digital Maritime Consultancy ApS
Sebastian Heppner ([email protected], [email protected]) for RWTH Aachen University
Empty file.
49 changes: 49 additions & 0 deletions test_data/nested-python/rfc2396/expected.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
alphanum = '[a-zA-Z0-9]'
mark = "[\\-\\-.!~*'()]"
unreserved = f'({alphanum}|{mark})'
hex = (
'([0-9]|[aA]|[bB]|[cC]|[dD]|[eE]|[fF]|[aA]|[bB]|[cC]|[dD]|[e'
'E]|[fF])'
)
escaped = f'%{hex}{hex}'
pchar = f'({unreserved}|{escaped}|[:@&=+$,])'
param = f'({pchar})*'
segment = f'({pchar})*(;{param})*'
path_segments = f'{segment}(/{segment})*'
abs_path = f'/{path_segments}'
scheme = '[a-zA-Z][a-zA-Z0-9+\\-.]*'
userinfo = f'({unreserved}|{escaped}|[;:&=+$,])*'
domainlabel = f'({alphanum}|{alphanum}({alphanum}|-)*{alphanum})'
toplabel = f'([a-zA-Z]|[a-zA-Z]({alphanum}|-)*{alphanum})'
hostname = f'({domainlabel}\\.)*{toplabel}(\\.)?'
ipv4address = '[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+'
host = f'({hostname}|{ipv4address})'
port = '[0-9]*'
hostport = f'{host}(:{port})?'
server = f'(({userinfo}@)?{hostport})?'
reg_name = f'({unreserved}|{escaped}|[$,;:@&=+])+'
authority = f'({server}|{reg_name})'
net_path = f'//{authority}({abs_path})?'
reserved = '[;/?:@&=+$,]'
uric = f'({reserved}|{unreserved}|{escaped})'
query = f'({uric})*'
hier_part = f'({net_path}|{abs_path})(\\?{query})?'
uric_no_slash = f'({unreserved}|{escaped}|[;?:@&=+$,])'
opaque_part = f'{uric_no_slash}({uric})*'
absoluteuri = f'{scheme}:({hier_part}|{opaque_part})'
fragment = f'({uric})*'
lowalpha = (
'([aA]|[bB]|[cC]|[dD]|[eE]|[fF]|[gG]|[hH]|[iI]|[jJ]|[k'
'K]|[lL]|[mM]|[nN]|[oO]|[pP]|[qQ]|[rR]|[sS]|[tT]|[uU]|[v'
'V]|[wW]|[xX]|[yY]|[zZ])'
)
path = f'({abs_path}|{opaque_part})?'
rel_segment = f'({unreserved}|{escaped}|[;@&=+$,])+'
rel_path = f'{rel_segment}({abs_path})?'
relativeuri = f'({net_path}|{abs_path}|{rel_path})(\\?{query})?'
upalpha = (
'([aA]|[bB]|[cC]|[dD]|[eE]|[fF]|[gG]|[hH]|[iI]|[jJ]|[kK]|'
'[lL]|[mM]|[nN]|[oO]|[pP]|[qQ]|[rR]|[sS]|[tT]|[uU]|[vV]|'
'[wW]|[xX]|[yY]|[zZ])'
)
uri_reference = f'({absoluteuri}|{relativeuri})?(\\#{fragment})?'
82 changes: 82 additions & 0 deletions test_data/nested-python/rfc2396/grammar.abnf
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
; From: https://www.ietf.org/rfc/rfc2396.txt
; Note that RFC 3986 is a newer version of the URI standard

alpha = lowalpha / upalpha

lowalpha = "a" / "b" / "c" / "d" / "e" / "f" / "g" / "h" / "i" /
"j" / "k" / "l" / "m" / "n" / "o" / "p" / "q" / "r" /
"s" / "t" / "u" / "v" / "w" / "x" / "y" / "z"

upalpha = "A" / "B" / "C" / "D" / "E" / "F" / "G" / "H" / "I" /
"J" / "K" / "L" / "M" / "N" / "O" / "P" / "Q" / "R" /
"S" / "T" / "U" / "V" / "W" / "X" / "Y" / "Z"

digit = "0" / "1" / "2" / "3" / "4" / "5" / "6" / "7" /
"8" / "9"

alphanum = alpha / digit

uric = reserved / unreserved / escaped

reserved = ";" / "/" / "?" / ":" / "@" / "&" / "=" / "+" /
"$" / ","

unreserved = alphanum / mark

mark = "-" / "-" / "." / "!" / "~" / "*" / "'" / "(" / ")"

escaped = "%" hex hex

hex = digit / "A" / "B" / "C" / "D" / "E" / "F" /
"a" / "b" / "c" / "d" / "e" / "f"

absoluteURI = scheme ":" ( hier-part / opaque-part )

hier-part = ( net-path / abs-path ) [ "?" query ]
net-path = "//" authority [ abs-path ]
abs-path = "/" path-segments

opaque-part = uric-no-slash *uric

uric-no-slash = unreserved / escaped / ";" / "?" / ":" / "@" /
"&" / "=" / "+" / "$" / ","

scheme = alpha *( alpha / digit / "+" / "-" / "." )

authority = server / reg-name

reg-name = 1*( unreserved / escaped / "$" / "," /
";" / ":" / "@" / "&" / "=" / "+" )

server = [ [ userinfo "@" ] hostport ]

userinfo = *( unreserved / escaped /
";" / ":" / "&" / "=" / "+" / "$" / "," )

hostport = host [ ":" port ]
host = hostname / IPv4address
hostname = *( domainlabel "." ) toplabel [ "." ]
domainlabel = alphanum / alphanum *( alphanum / "-" ) alphanum
toplabel = alpha / alpha *( alphanum / "-" ) alphanum

IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
port = *digit

path = [ abs-path / opaque-part ]
path-segments = segment *( "/" segment )
segment = *pchar *( ";" param )
param = *pchar
pchar = unreserved / escaped /
":" / "@" / "&" / "=" / "+" / "$" / ","

query = *uric

URI-reference = [ absoluteURI / relativeURI ] [ "#" fragment ]

fragment = *uric

relativeURI = ( net-path / abs-path / rel-path ) [ "?" query ]

rel-path = rel-segment [ abs-path ]
rel-segment = 1*( unreserved / escaped /
";" / "@" / "&" / "=" / "+" / "$" / "," )

0 comments on commit 3336f21

Please sign in to comment.