-
Notifications
You must be signed in to change notification settings - Fork 0
/
ParseAffFile.hs
86 lines (65 loc) · 2.46 KB
/
ParseAffFile.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
module ParseAffFile where
import Text.Parsec.Char
import Text.Parsec.String
import Text.Parsec.Combinator
import Text.Parsec.Prim
--import Text.Parsec.Token
import CommonType
import Data.Maybe
import Data.Map as M hiding (map, mapMaybe)
import Data.List
import Data.Function
----for main----
--import System.Environment
--import Control.Monad
groupLetterIdx:: [(LetterIdx,AffRule)]->[(LetterIdx,[AffRule])]
groupLetterIdx = map (headfst.unzip) . groupBy ( (==) `on` fst) . sortBy (compare `on` fst)
where
headfst (a,b) = (head a, b)
getLines :: Alphabet->String->[(LetterIdx,AffRule)]
either2Maybe :: Either b a -> Maybe a
either2Maybe (Right a) = Just a
either2Maybe (Left _) = Nothing
getLines alph = mapMaybe (either2Maybe. parse (pAffFileLine alph) "") . lines
pAffFileLine:: Alphabet-> Parser (LetterIdx, AffRule)
pAffFileLine alph = try (pPfx alph) <|> try (pSfx alph)
pPfx :: Alphabet->Parser (LetterIdx,AffRule)
pPfx alph = do
string "PFX"
(letter,replRule,regex)<-pRules alph
return (letter, Pfx replRule regex)
pSfx alph = do
string "SFX"
(letter,replRule,regex)<-pRules alph
return (letter, Sfx replRule regex)
pRules alph = do
spaces
letter <- oneOf $ ['A'..'Z']++['a'..'z']
spaces
replRule <-pReplRule alph
spaces
regex <- pRegexRule alph
return (letter,replRule,regex)
fromAlphabet:: Alphabet->String->[Letter]
fromAlphabet alph = mapMaybe (`M.lookup` alph)
pReplRule alph = do
totake<-many1 $ oneOf ('0': keys alph )
spaces
replacement<-many1 $ oneOf $ keys alph
--hack alert: '0' is mapped into [] automatically
return (ReplRule (fromAlphabet alph totake) (fromAlphabet alph replacement))
pRegexRule:: Alphabet->Parser RegexRule
pRegexRule alp = many (pRegexRuleElement alp)
pRegexRuleElement:: Alphabet->Parser [Letter]
pRegexRuleElement alp = pLetterAsList alp <|> pRegexList alp <|> pRegexDot alp
pLetterAsList alph = do
letter <- oneOf $ keys alph
return (fromAlphabet alph [letter])
pRegexList alph = do
chars <- between ( string "[" ) ( string "]") (many $ noneOf "[]")
return (fromAlphabet alph chars)
pRegexDot alph = do
string "."
return (uniq $ M.elems alph)
where uniq [] = []
uniq a = (map head . group .sort) a