-
Notifications
You must be signed in to change notification settings - Fork 0
/
AutoIndexer.cpp
125 lines (105 loc) · 2.54 KB
/
AutoIndexer.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#include "AutoIndexer.h"
using namespace std;
AutoIndexer::AutoIndexer(ifstream& input, ofstream& output)
{
if (!input.good() || !output.good())
{
cout << "Error! The provided input and/or output files are invalid!" << endl;
return;
}
cout << "Beginning index creation" << endl;
loadData(input);
outputData(output);
cout << "Finished index creation" << endl;
}
void AutoIndexer::processLine(int page, String& line)
{
ArrayWrapper<String> split = line.split(' ');
String composite;
for (unsigned int i = 0; i < split.arrayLength(); i++)
{
String word = split[i].toLower();
if (word[0] == '[')
{
composite = word.substring(1, word.getLength());
}
else if (composite.getLength() > 0)
{
if (word[word.getLength() - 1] == ']')
{
String key = (composite + ' ' + word.substring(0, word.getLength() - 1)).stripCharacter([](char check)
{
return ispunct(check) && check != '+' && check != '-';
});
composite = String();
indexEntries[key].push_back(page);
}
else
{
composite += (' ' + word);
}
}
else
{
word = word.stripCharacter([](char check)
{
return ispunct(check) && check != '+' && check != '-';
});
if (indexEntries[word].indexOf(page) == indexEntries[word].getSize())
{
indexEntries[word].push_back(page);
}
}
}
}
void AutoIndexer::loadData(ifstream& input)
{
cout << "Processing Data" << endl;
int page = -1;
while (input.good())
{
String line;
getline(input, line);
if (line == "<-1>")
{
break;
}
if (line == "")
{
continue;
}
if (line[0] == '<')
{
char* c_string = line.substring(1, line.getLength() - 1).c_string();
page = atoi(c_string);
delete[] c_string;
}
else
{
processLine(page, line);
}
}
cout << "Processed Data (" << indexEntries.size() << " Index Entries)" << endl;
}
void AutoIndexer::outputData(ofstream& output)
{
cout << "Outputting Data" << endl;
char header = '-';
for (pair<String, ArrayList<int>> element : indexEntries)
{
String key = element.first;
ArrayList<int> pages = element.second;
if (header != toupper(key[0]))
{
header = toupper(key[0]);
output << '[' << header << ']' << endl;
}
output << key << ": " << pages.pop();
for (int page : pages)
{
output << ", " << page;
}
output << endl;
}
cout << "Output Data" << endl;
}