-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathIndexingFile1.hs
149 lines (119 loc) · 5.05 KB
/
IndexingFile1.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
-- This is source code module for 12.30, 12.31- lambda expressions, 12.33, 12.37, 12.38, 12.40.
module File1
(Doc, Line, Word1, lines1, modifyCapEqui, modifyCapEqui_Intermediate,
numLines, numWords, whitespace, splitWords, split, dropSpace, getWord, dropWord, allNumWords, shorten,
sortLs, orderPair, orderPair1, makeLists, amalgamate, printIndex, showIndex, makeIndex)
where
import Data.Char
type Doc = String
type Line = String
type Word1 = String
-- original code uses default lines function. I modified it according to 12.30.
-- This function separates lines.
lines1 :: Doc -> [Line]
lines1 d
|d /= [] = [takeWhile (/= '\n') d] ++ lines1 (drop 1 (dropWhile (/= '\n') d))
|otherwise = []
-- original program is modified according to 12.40.
-- Question: This doesn't work well for proper names like "Amelia". What could you do about that?
-- Answer: One approach is to check whether previous word was "the" article or not. So if it was "the" then that can be name of place or object.
modifyCapEqui :: [Line] -> [Line]
modifyCapEqui xs = map modifyCapEqui_Intermediate xs
modifyCapEqui_Intermediate :: String -> String
modifyCapEqui_Intermediate = map toLower
-- This is function from original program.
-- This maps line to line number.
numLines :: [Line] -> [(Int , Line)]
numLines l = zip [1 .. length l] l
-- This is function from original program.
-- This breaks single line into words and attach line number to them.
numWords :: (Int , Line) -> [(Int , Word1)]
numWords (number , line) = [ (number , word) | word <- splitWords line ]
-- This is function from original program.
-- This is whitespace declairation.
whitespace :: String
whitespace = " \n\t;:.,\'\"!?()-"
-- Following set of functions are use by numWords function.
-- These are functions from original program.
-- split words
splitWords :: String -> [Word1]
splitWords st = split (dropSpace st)
split :: String -> [Word1]
split [] = []
split st = (getWord st) : split (dropSpace (dropWord st))
-- This is function from original program.
-- Drop the whitespace character from the string.
dropSpace :: String -> String
dropSpace [] = []
dropSpace (x:xs)
| elem x whitespace = dropSpace xs
| otherwise = (x:xs)
-- This is function from original program.
-- Get the front word of string
getWord :: String -> String
getWord [] = []
getWord (x:xs)
| elem x whitespace = []
| otherwise = x : getWord xs
-- This is function from original program.
-- Drop the front words of string.
dropWord :: String -> String
dropWord [] = []
dropWord (x:xs)
| elem x whitespace = (x:xs)
| otherwise = dropWord xs
-- End of functions use by numWords.
-- This function is modified according to 12.38.
-- This breaks all line into words and attach line numbers to them and also remove words < 4.
allNumWords :: [( Int , Line)] -> [(Int , Word1)]
allNumWords = shorten. concat . map numWords
shorten :: [(Int,Word1)] -> [(Int,Word1)]
shorten = filter sizer
where
sizer (nl,wd) = length wd > 3
-- This function is modified according to 12.33.
-- This function sorts the list.
sortLs :: [(Int , Word1)] -> [(Int , Word1)]
sortLs [] = []
sortLs (p:ps) = sortLs smaller ++ (p:equal) ++ sortLs larger
where
smaller = [ q | q<-ps , orderPair q p ]
larger = [ q | q<-ps , orderPair p q ]
equal = [ q | q<-ps , orderPair1 p q ]
orderPair :: (Int , Word1) -> (Int , Word1) -> Bool
orderPair ( n1 , w1 ) ( n2 , w2 ) = w1 < w2 || ( w1 == w2 && n1 < n2 )
orderPair1 :: (Int , Word1) -> (Int , Word1) -> Bool
orderPair1 ( n1 , w1 ) ( n2 , w2 ) = w1 == w2 && n1 == n2
-- This function is modified according to 12.31 - lambda expressions.
-- make list
makeLists :: [ (Int,Word1) ] -> [ ([Int],Word1) ]
makeLists = map (\(n,st) -> ([n],st))
-- This is original function.
-- combine the list of int of same words into one list.
amalgamate :: [ ([Int],Word1) ] -> [ ([Int],Word1) ]
amalgamate [] = []
amalgamate [p] = [p]
amalgamate ((l1,w1):(l2,w2):rest)
| w1 /= w2 = (l1,w1) : amalgamate ((l2,w2):rest)
| otherwise = amalgamate ((l1++l2,w1):rest)
-- This function is developed according to 12.37.
printIndex :: [([Int], Word1)] -> IO()
printIndex xs = mapM_ (putStrLn . showIndex) xs
showIndex :: (Show c, Show d) => (c,d) -> String
showIndex (c,d) = (show d) ++ " " ++ (show c)
-- This is the original function but has to modify it according to changes.
-- make index
makeIndex :: Doc -> IO()
makeIndex
= lines1 >.> -- Doc -> [Line]
modifyCapEqui >.> -- [Line] -> [Line]
numLines >.> -- [Line] -> [(Int,Line)]
allNumWords >.> -- [(Int,Line)] -> [(Int,Word)]
sortLs >.> -- [(Int,Word)] -> [(Int,Word)]
makeLists >.> -- [(Int,Word)] -> [([Int],Word)]
amalgamate >.> -- [([Int],Word)] -> [([Int],Word)]
printIndex -- [([Int],Word)] -> IO()
-- This is for >.> mention in makeIndex.
infixl 9 >.>
(>.>) :: (a -> b) -> (b -> c) -> (a -> c)
g >.> f = f . g