-
Notifications
You must be signed in to change notification settings - Fork 0
/
grammar.hs
142 lines (120 loc) · 6.58 KB
/
grammar.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
{-|
Module : Grammar
Description : Data structures and helper functions for a Grammar
Copyright : (c) Samuel Williams, 2021
License : GPL-3
Maintainer : [email protected]
Stability : release
This module holds the @Grammar@ data structure definition, used for generating the "DFA" structure, as well as various various getters for that structure.
It also holds the @ScannerSpec@ structure, which is an abstraction of "Scanner" used for generating the "Scanner" instance code.
-}
module Grammar where
import Data.HashMap.Strict hiding (map)
import Data.Hashable
import Data.List
import Data.Char
-- | Grammar interpreted from the gmr file, contains all information needed to generate a "DFA"
data Grammar = Grammar { tokenDefs :: [TokenDef] -- ^ All tokens definitions for a grammar
, precLevels :: [PrecLevel] -- ^ Precedence definitions for tokens
, rules :: [Rule] -- ^ Ordered rule base
} deriving Show
-- | Direct output from parser of token definitions, to be translated to a map.
data TokenDef = TokenDef { tokenName :: String -- ^ Name used in rules
, tokenPattern :: String -- ^ Haskell pattern to match in generated code
} deriving (Show)
-- | Hashing based only on name
instance Hashable TokenDef where
hashWithSalt salt td = hashWithSalt salt $ tokenName td
-- | Equality based only on name
instance Eq TokenDef where
a == b = (tokenName a) == (tokenName b)
-- | Parameterless data type for associativity
data Associativity = LeftAssoc | RightAssoc | NonAssoc deriving (Show, Eq)
-- | Precedence level with list of tokens and associativity for those tokens
data PrecLevel = PrecLevel { levelAssociativity :: Associativity
, levelTokens :: [String] -- ^ tokens as list of @TokenDef@ names
} deriving Show
-- | Grammar Rule as a list of productions for a non terminal
data Rule = Rule { ruleName :: String -- ^ Name of non terminal
, ruleProductions :: [RuleProduction]
} deriving Show
-- | Equality based only the name
instance Eq Rule where
a == b = (ruleName a) == (ruleName b)
-- | Tokens used indirectly in @RuleProduction@, as either terminals or non terminals
data RuleTokenType = RuleNonTerminal String | RuleTerminal String deriving (Show, Eq)
-- | Modifiers for @RuleToken@, eventually replaced with extra rules
data RuleTokenModifier = RuleTokenModifierNormal -- ^ Default rule behaviour, ignored on pre-processing
| RuleTokenModifierSome (Maybe RuleTokenType) -- ^ One or more matching for a token, with optional separator token
| RuleTokenModifierMany (Maybe RuleTokenType) -- ^ Zero or more matching for a token, with optional separator token
| RuleTokenModifierOptional -- ^ Zero or one matching for a token
deriving Show
-- | Token used within @RuleProduction@, holding a @RuleTokenType@ and associated @RuleTokenModifier@
data RuleToken = RuleToken { tokenType :: RuleTokenType
, tokenModifier :: RuleTokenModifier
} deriving Show
-- | Name getter for both types of @RuleTokenType@
getTokenTypeStr :: RuleTokenType -> String
getTokenTypeStr (RuleNonTerminal str) = str
getTokenTypeStr (RuleTerminal str) = str
-- | Name getter for @RuleToken@ wrapper
getTokenStr :: RuleToken -> String
getTokenStr = getTokenTypeStr . tokenType
-- | Creates a @RuleToken@ non terminal from its name
nonTerminalToken :: String -> RuleToken
nonTerminalToken str = ruleToken $ RuleNonTerminal str
-- | Creates a @RuleToken@ terminal from a @TokenDef@ name
terminalToken :: String -> RuleToken
terminalToken str = ruleToken $ RuleTerminal str
-- | Wraps a @RuleTokenType@ with the default @RuleTokenModifier@
ruleToken :: RuleTokenType -> RuleToken
ruleToken t = RuleToken t RuleTokenModifierNormal
-- | Grammar production, describing a rule of the form @A :: a B c { haskellCode }@
data RuleProduction = RuleProduction { productionTokens :: [RuleToken] -- ^ Tokens of the production
, productionResult :: String -- ^ Resulting haskell code for reducing this production
, productionPrecToken :: (Maybe String) -- ^ Optional production precedence and associativity override
} deriving Show
-- | Copy of the scanner object but all fields are an instance of Show (specifically changing the @ParserMap@ to a @String@)
data ScannerSpec = ScannerSpec
{ specSeparateCasedIdentifiers :: Bool
, specIgnoreWhitespace :: Bool
, specIgnoreComments :: Bool
, specOperators :: [String]
, specKeywords :: [String]
, specBlockComment :: Maybe (String, String)
, specLineComment :: Maybe String
, specParserMap :: Maybe String
} deriving Show
-- | Default definition for a ScannerSpec, for easy modification via record updating
scannerSpec = ScannerSpec
{ specSeparateCasedIdentifiers=False
, specIgnoreWhitespace=True
, specIgnoreComments=True
, specOperators=[]
, specKeywords=[]
, specBlockComment=Nothing
, specLineComment=Nothing
, specParserMap=Nothing
}
-- | Takes a @ScannerSpec@ and @Grammar@ and adds default and appropriate @TokenDef@s.
-- Adds defaults for all @Token@ constructors in "Scanner", generating those for operators and keywords.
addScannerSpecTokens :: ScannerSpec -> Grammar -> Grammar
addScannerSpecTokens spec gmr = gmr{ tokenDefs=nub $ tokenDefs gmr ++ makeTokens "TokenOperator " ops ++ makeTokens "TokenKeyword " kwds ++ defaultTokens }
where
ops = specOperators spec
kwds = specKeywords spec
makeTokens prefix = fmap $ \x -> TokenDef (map toLower x) $ prefix ++ show x
defaultTokens = [ TokenDef "(" "TokenOpenParen"
, TokenDef ")" "TokenCloseParen"
, TokenDef "[" "TokenOpenSquare"
, TokenDef "]" "TokenCloseSquare"
, TokenDef "{" "TokenOpenCurly"
, TokenDef "}" "TokenCloseCurly"
, TokenDef "identifier" "TokenIdentifier $$"
, TokenDef "upperIdentifier" "TokenUpperIdentifier $$"
, TokenDef "stringLit" "TokenStringLit $$"
, TokenDef "integerLit" "TokenIntLit $$"
, TokenDef "floatLit" "TokenFloatLit $$"
, TokenDef "blockComment" "TokenBlockComment $$"
, TokenDef "lineComment" "TokenLineComment $$"
, TokenDef "whitespace" "TokenWhitespace $$" ]