Skip to content

Commit

Permalink
Updating Flanker library to support read-only filesystems and integra…
Browse files Browse the repository at this point in the history
…te upstream changes. Fixes #31
  • Loading branch information
jordan-wright committed Jun 11, 2019
1 parent bfd8a90 commit 4de4bbc
Show file tree
Hide file tree
Showing 18 changed files with 851 additions and 150 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,6 @@ isthislegit-extension.zip
# Any dev manifest files
manifest-dev.json
manifest-prod.json

# Any offline developer data
data/
2 changes: 2 additions & 0 deletions dashboard/lib/flanker/CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ The following changes were made:

* All occurrences of the `regex` library were replaced by the standard `re` library to make the library compatible with Google App Engine.
* The reliance on `cchardet` have been removed and replaced by `chardet`.
* Added `debug=False` to all instantiations of the `yacc.yacc` parser to support read-only filesystems
* Added `write_tables=False` to all instantiations of the `yacc.yacc` parser to support read-only filesystems
Empty file.
90 changes: 90 additions & 0 deletions dashboard/lib/flanker/addresslib/_parser/addr_spec_parsetab.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@

# addr_spec_parsetab.py
# This file is automatically generated. Do not edit.
_tabversion = '3.10'

_lr_method = 'LALR'

_lr_signature = 'addr_specFWSP AT DOT COMMA SEMICOLON LANGLE RANGLE ATOM DOT_ATOM LBRACKET RBRACKET DTEXT DQUOTE QTEXT QPAIR LPAREN RPAREN CTEXT URLmailbox_or_url_list : mailbox_or_url_list delim mailbox_or_url\n | mailbox_or_url_list delim\n | mailbox_or_urldelim : delim fwsp COMMA\n | delim fwsp SEMICOLON\n | COMMA\n | SEMICOLONmailbox_or_url : mailbox\n | urlurl : ofwsp URL ofwspmailbox : addr_spec\n | angle_addr\n | name_addrname_addr : ofwsp phrase angle_addrangle_addr : ofwsp LANGLE addr_spec RANGLE ofwspaddr_spec : ofwsp local_part AT domain ofwsplocal_part : DOT_ATOM\n | ATOM\n | quoted_stringdomain : DOT_ATOM\n | ATOM\n | domain_literalquoted_string : DQUOTE quoted_string_text DQUOTE\n | DQUOTE DQUOTEquoted_string_text : quoted_string_text QTEXT\n | quoted_string_text QPAIR\n | quoted_string_text fwsp\n | QTEXT\n | QPAIR\n | fwspdomain_literal : LBRACKET domain_literal_text RBRACKET\n | LBRACKET RBRACKETdomain_literal_text : domain_literal_text DTEXT\n | domain_literal_text fwsp\n | DTEXT\n | fwspcomment : LPAREN comment_text RPAREN\n | LPAREN RPARENcomment_text : comment_text CTEXT\n | comment_text fwsp\n | CTEXT\n | fwspphrase : phrase fwsp ATOM\n | phrase fwsp DOT_ATOM\n | phrase fwsp DOT\n | phrase fwsp quoted_string\n | phrase ATOM\n | phrase DOT_ATOM\n | phrase DOT\n | phrase quoted_string\n | ATOM\n | DOT_ATOM\n | DOT\n | quoted_stringofwsp : fwsp comment fwsp\n | fwsp comment\n | comment fwsp\n | comment\n | fwsp\n |fwsp : FWSP'

_lr_action_items = {'FWSP':([0,2,4,6,7,10,14,15,16,17,20,21,22,24,25,26,27,28,29,30,31,32,33,34,35,38,39,40,41,42,43,44,],[4,4,-61,4,4,4,-42,4,-38,-41,-30,-29,-28,4,-40,-37,-39,4,4,-20,-21,-22,-27,-26,-25,-36,-35,4,-32,-34,-33,-31,]),'QPAIR':([4,10,20,21,22,24,33,34,35,],[-61,21,-30,-29,-28,34,-27,-26,-25,]),'QTEXT':([4,10,20,21,22,24,33,34,35,],[-61,22,-30,-29,-28,35,-27,-26,-25,]),'DTEXT':([4,29,38,39,40,42,43,],[-61,39,-36,-35,43,-34,-33,]),'DQUOTE':([0,1,2,3,4,7,8,10,16,18,20,21,22,24,26,33,34,35,],[-60,-59,-58,10,-61,-56,-57,23,-38,-55,-30,-29,-28,36,-37,-27,-26,-25,]),'LBRACKET':([19,],[29,]),'DOT_ATOM':([0,1,2,3,4,7,8,16,18,19,26,],[-60,-59,-58,11,-61,-56,-57,-38,-55,30,-37,]),'AT':([9,11,12,13,23,36,],[19,-17,-18,-19,-24,-23,]),'LPAREN':([0,1,4,28,30,31,32,41,44,],[6,6,-61,6,-20,-21,-22,-32,-31,]),'ATOM':([0,1,2,3,4,7,8,16,18,19,26,],[-60,-59,-58,12,-61,-56,-57,-38,-55,31,-37,]),'RPAREN':([4,6,14,15,17,25,27,],[-61,16,-42,26,-41,-40,-39,]),'RBRACKET':([4,29,38,39,40,42,43,],[-61,41,-36,-35,44,-34,-33,]),'CTEXT':([4,6,14,15,17,25,27,],[-61,17,-42,27,-41,-40,-39,]),'$end':([1,2,4,5,7,8,16,18,26,28,30,31,32,37,41,44,],[-59,-58,-61,0,-56,-57,-38,-55,-37,-60,-20,-21,-22,-16,-32,-31,]),}

_lr_action = {}
for _k, _v in _lr_action_items.items():
for _x,_y in zip(_v[0],_v[1]):
if not _x in _lr_action: _lr_action[_x] = {}
_lr_action[_x][_k] = _y
del _lr_action_items

_lr_goto_items = {'fwsp':([0,2,6,7,10,15,24,28,29,40,],[1,8,14,18,20,25,33,1,38,42,]),'comment':([0,1,28,],[2,7,2,]),'domain':([19,],[28,]),'comment_text':([6,],[15,]),'ofwsp':([0,28,],[3,37,]),'local_part':([3,],[9,]),'quoted_string_text':([10,],[24,]),'domain_literal_text':([29,],[40,]),'addr_spec':([0,],[5,]),'quoted_string':([3,],[13,]),'domain_literal':([19,],[32,]),}

_lr_goto = {}
for _k, _v in _lr_goto_items.items():
for _x, _y in zip(_v[0], _v[1]):
if not _x in _lr_goto: _lr_goto[_x] = {}
_lr_goto[_x][_k] = _y
del _lr_goto_items
_lr_productions = [
("S' -> addr_spec","S'",1,None,None,None),
('mailbox_or_url_list -> mailbox_or_url_list delim mailbox_or_url','mailbox_or_url_list',3,'p_expression_mailbox_or_url_list','parser.py',19),
('mailbox_or_url_list -> mailbox_or_url_list delim','mailbox_or_url_list',2,'p_expression_mailbox_or_url_list','parser.py',20),
('mailbox_or_url_list -> mailbox_or_url','mailbox_or_url_list',1,'p_expression_mailbox_or_url_list','parser.py',21),
('delim -> delim fwsp COMMA','delim',3,'p_delim','parser.py',30),
('delim -> delim fwsp SEMICOLON','delim',3,'p_delim','parser.py',31),
('delim -> COMMA','delim',1,'p_delim','parser.py',32),
('delim -> SEMICOLON','delim',1,'p_delim','parser.py',33),
('mailbox_or_url -> mailbox','mailbox_or_url',1,'p_expression_mailbox_or_url','parser.py',36),
('mailbox_or_url -> url','mailbox_or_url',1,'p_expression_mailbox_or_url','parser.py',37),
('url -> ofwsp URL ofwsp','url',3,'p_expression_url','parser.py',41),
('mailbox -> addr_spec','mailbox',1,'p_expression_mailbox','parser.py',45),
('mailbox -> angle_addr','mailbox',1,'p_expression_mailbox','parser.py',46),
('mailbox -> name_addr','mailbox',1,'p_expression_mailbox','parser.py',47),
('name_addr -> ofwsp phrase angle_addr','name_addr',3,'p_expression_name_addr','parser.py',51),
('angle_addr -> ofwsp LANGLE addr_spec RANGLE ofwsp','angle_addr',5,'p_expression_angle_addr','parser.py',55),
('addr_spec -> ofwsp local_part AT domain ofwsp','addr_spec',5,'p_expression_addr_spec','parser.py',59),
('local_part -> DOT_ATOM','local_part',1,'p_expression_local_part','parser.py',63),
('local_part -> ATOM','local_part',1,'p_expression_local_part','parser.py',64),
('local_part -> quoted_string','local_part',1,'p_expression_local_part','parser.py',65),
('domain -> DOT_ATOM','domain',1,'p_expression_domain','parser.py',69),
('domain -> ATOM','domain',1,'p_expression_domain','parser.py',70),
('domain -> domain_literal','domain',1,'p_expression_domain','parser.py',71),
('quoted_string -> DQUOTE quoted_string_text DQUOTE','quoted_string',3,'p_expression_quoted_string','parser.py',75),
('quoted_string -> DQUOTE DQUOTE','quoted_string',2,'p_expression_quoted_string','parser.py',76),
('quoted_string_text -> quoted_string_text QTEXT','quoted_string_text',2,'p_expression_quoted_string_text','parser.py',83),
('quoted_string_text -> quoted_string_text QPAIR','quoted_string_text',2,'p_expression_quoted_string_text','parser.py',84),
('quoted_string_text -> quoted_string_text fwsp','quoted_string_text',2,'p_expression_quoted_string_text','parser.py',85),
('quoted_string_text -> QTEXT','quoted_string_text',1,'p_expression_quoted_string_text','parser.py',86),
('quoted_string_text -> QPAIR','quoted_string_text',1,'p_expression_quoted_string_text','parser.py',87),
('quoted_string_text -> fwsp','quoted_string_text',1,'p_expression_quoted_string_text','parser.py',88),
('domain_literal -> LBRACKET domain_literal_text RBRACKET','domain_literal',3,'p_expression_domain_literal','parser.py',92),
('domain_literal -> LBRACKET RBRACKET','domain_literal',2,'p_expression_domain_literal','parser.py',93),
('domain_literal_text -> domain_literal_text DTEXT','domain_literal_text',2,'p_expression_domain_literal_text','parser.py',100),
('domain_literal_text -> domain_literal_text fwsp','domain_literal_text',2,'p_expression_domain_literal_text','parser.py',101),
('domain_literal_text -> DTEXT','domain_literal_text',1,'p_expression_domain_literal_text','parser.py',102),
('domain_literal_text -> fwsp','domain_literal_text',1,'p_expression_domain_literal_text','parser.py',103),
('comment -> LPAREN comment_text RPAREN','comment',3,'p_expression_comment','parser.py',107),
('comment -> LPAREN RPAREN','comment',2,'p_expression_comment','parser.py',108),
('comment_text -> comment_text CTEXT','comment_text',2,'p_expression_comment_text','parser.py',112),
('comment_text -> comment_text fwsp','comment_text',2,'p_expression_comment_text','parser.py',113),
('comment_text -> CTEXT','comment_text',1,'p_expression_comment_text','parser.py',114),
('comment_text -> fwsp','comment_text',1,'p_expression_comment_text','parser.py',115),
('phrase -> phrase fwsp ATOM','phrase',3,'p_expression_phrase','parser.py',119),
('phrase -> phrase fwsp DOT_ATOM','phrase',3,'p_expression_phrase','parser.py',120),
('phrase -> phrase fwsp DOT','phrase',3,'p_expression_phrase','parser.py',121),
('phrase -> phrase fwsp quoted_string','phrase',3,'p_expression_phrase','parser.py',122),
('phrase -> phrase ATOM','phrase',2,'p_expression_phrase','parser.py',123),
('phrase -> phrase DOT_ATOM','phrase',2,'p_expression_phrase','parser.py',124),
('phrase -> phrase DOT','phrase',2,'p_expression_phrase','parser.py',125),
('phrase -> phrase quoted_string','phrase',2,'p_expression_phrase','parser.py',126),
('phrase -> ATOM','phrase',1,'p_expression_phrase','parser.py',127),
('phrase -> DOT_ATOM','phrase',1,'p_expression_phrase','parser.py',128),
('phrase -> DOT','phrase',1,'p_expression_phrase','parser.py',129),
('phrase -> quoted_string','phrase',1,'p_expression_phrase','parser.py',130),
('ofwsp -> fwsp comment fwsp','ofwsp',3,'p_expression_ofwsp','parser.py',139),
('ofwsp -> fwsp comment','ofwsp',2,'p_expression_ofwsp','parser.py',140),
('ofwsp -> comment fwsp','ofwsp',2,'p_expression_ofwsp','parser.py',141),
('ofwsp -> comment','ofwsp',1,'p_expression_ofwsp','parser.py',142),
('ofwsp -> fwsp','ofwsp',1,'p_expression_ofwsp','parser.py',143),
('ofwsp -> <empty>','ofwsp',0,'p_expression_ofwsp','parser.py',144),
('fwsp -> FWSP','fwsp',1,'p_expression_fwsp','parser.py',148),
]
190 changes: 190 additions & 0 deletions dashboard/lib/flanker/addresslib/_parser/lexer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
import ply.lex as lex
import logging

import six

log = logging.getLogger(__name__)
log.setLevel(logging.INFO)

states = (
('domain', 'exclusive'),
('quote', 'exclusive'),
('comment', 'exclusive'),
)

tokens = (
'FWSP',
'AT',
'DOT',
'COMMA',
'SEMICOLON',
'LANGLE',
'RANGLE',
'ATOM',
'DOT_ATOM',
'LBRACKET',
'RBRACKET',
'DTEXT',
'DQUOTE',
'QTEXT',
'QPAIR',
'LPAREN',
'RPAREN',
'CTEXT',
'URL'
)

# Urls - Not a part of the message format RFC but we permit these currently

def t_URL(t):
r'http(s)?://[^\s<>{}|^~\[\]`;,]+'
return t

# Atoms - https://tools.ietf.org/html/rfc5322#section-3.2.3

t_FWSP = r'([\s\t]*\r\n)?[\s\t]+' # folding whitespace
t_AT = r'\@' # '@'
t_DOT = r'\.' # '.'
t_COMMA = r'\,' # ','
t_LANGLE = r'\<' # '<'
t_RANGLE = r'\>' # '>'
t_SEMICOLON = r'\;' # ';'

if six.PY2:
_UTF8_2 = r'[\xC2-\xDF][\x80-\xBF]'
_UTF8_3 = (r'(\xE0[\xA0-\xBF][\x80-\xBF]'
r'|[\xE1-\xEC][\x80-\xBF]{2}'
r'|\xED[\x80-\x9F][\x80-\xBF]'
r'|[\xEE-\xEF][\x80-\xBF]{2}'
r')')

_UTF8_4 = (r'(\xF0[\x90-\xBF][\x80-\xBF]{2}'

r'|[\xF1-\xF3][\x80-\xBF]{3}'
r'|\xF4[\x80-\x8F][\x80-\xBF]{2}'
r')')
else:
_UTF8_2 = r'[\u0080-\u07ff]'
_UTF8_3 = (r'([\u0800-\u0fff]'
r'|[\u1000-\ucfff]'
r'|[\ud000-\ud7ff]'
r'|[\ue000-\uffff]'
r')')
_UTF8_4 = (r'([\U00010000-\U0003ffff]'
r'|[\U00040000-\U000fffff]'
r'|[\U00100000-\U0010ffff]'
r')')

_UNICODE_CHAR = '({}|{}|{})'.format(_UTF8_2, _UTF8_3, _UTF8_4)


t_ATOM = r'''
( [a-zA-Z0-9!#$%&\'*+\-/=?^_`{{|}}~] # Visible ASCII except (),.:;<>@[\]
| {unicode_char}
)+
'''.format(unicode_char=_UNICODE_CHAR)

# NOTE: Our expression for dot_atom here differs from RFC 5322. In the RFC
# dot_atom is expressed as a superset of atom. That makes it difficult to write
# unambiguous parsing rules so we've defined it here in such a way that it
# doesn't conflict. As a result, any rules that accept dot_atom should also
# accept atom.
t_DOT_ATOM = r'{atom}(\.{atom})+'.format(atom=t_ATOM)


def t_error(t):
log.warning("syntax error in default lexer, token=%s", t)

# Domain literals - https://tools.ietf.org/html/rfc5322#section-3.4.1


def t_LBRACKET(t):
r'\['
t.lexer.begin('domain')
return t


def t_domain_RBRACKET(t):
r'\]'
t.lexer.begin('INITIAL')
return t


t_domain_DTEXT = r'''
( [\x21-\x5A\x5E-\x7E] # Visible ASCII except '[', '\', ']',
| {unicode_char}
)+
'''.format(unicode_char=_UNICODE_CHAR)

t_domain_FWSP = r'([\s\t]*\r\n)?[\s\t]+' # folding whitespace


def t_domain_error(t):
log.warning("syntax error in domain lexer, token=%s", t)

# Quoted strings - https://tools.ietf.org/html/rfc5322#section-3.2.4


def t_DQUOTE(t):
r'\"'
t.lexer.begin('quote')
return t


def t_quote_DQUOTE(t):
r'\"'
t.lexer.begin('INITIAL')
return t


t_quote_QTEXT = r'''
( [\x21\x23-\x5B\x5D-\x7E] # Visible ASCII except '"', '\'
| {unicode_char}
)+
'''.format(unicode_char=_UNICODE_CHAR)

t_quote_QPAIR = r'''
\\ # '\'
( [\x21-\x7E] # Visible ASCII
| \s # ' ' technically not valid
| {unicode_char}
)
'''.format(unicode_char=_UNICODE_CHAR)

t_quote_FWSP = r'([\s\t]*\r\n)?[\s\t]+' # folding whitespace


def t_quote_error(t):
log.warning("syntax error in quoted string lexer, token=%s", t)


# Comments - https://tools.ietf.org/html/rfc5322#section-3.2.2


def t_LPAREN(t):
r'\('
t.lexer.begin('comment')
return t


def t_comment_RPAREN(t):
r'\)'
t.lexer.begin('INITIAL')
return t


t_comment_CTEXT = r'''
( [\x21-\x27\x2A-\x5B\x5D-\x7E] # Visible ASCII except '(', ')', or '\'
| {unicode_char} )+
'''.format(unicode_char=_UNICODE_CHAR)


# Folding whitespace.
t_comment_FWSP = r'([\s\t]*\r\n)?[\s\t]+'

def t_comment_error(t):
log.warning("syntax error in comment lexer, token=%s", t)


# Build the lexer
lexer = lex.lex(errorlog=log)
Loading

0 comments on commit 4de4bbc

Please sign in to comment.