-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtyposcan.c
208 lines (168 loc) · 5.91 KB
/
typoscan.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
/*
* This file is part of typoscan.
*
* typoscan is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation,
* either version 3 of the License, or (at your option) any
* later version.
*
* typoscan is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public
* License along with typoscan. If not, see
* <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#include <curl/curl.h>
#include <errno.h>
#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "error.h"
#include "progname.h"
#include "typoscan.h"
/* Regular expressions. */
struct typo_regex *regexes;
int regexalloccount, regexcount;
/* Options. */
int run_verbose = 0;
struct Buffer
{
char *buf;
size_t size;
};
static size_t writememorycallback (void *contents, size_t size, size_t nmemb, void *userp)
{
size_t realsize = size * nmemb;
struct Buffer *b = (struct Buffer *) userp;
b->buf = b->buf ? realloc (b->buf, b->size + realsize) : malloc (b->size + realsize);
if (!b->buf)
error (1, errno, "out of memory");
memcpy (&(b->buf [b->size]), contents, realsize);
b->size += realsize;
return realsize;
}
int gettypolist (const char *URL, struct Buffer *b)
{
b->buf = NULL;
b->size = 0;
/* Init the curl session. */
CURL *curl_handle = curl_handle = curl_easy_init();
/* Specify URL to get. */
curl_easy_setopt (curl_handle, CURLOPT_URL, URL);
/* Set data callback. */
curl_easy_setopt (curl_handle, CURLOPT_WRITEFUNCTION, writememorycallback);
/* Set callback parameter. */
curl_easy_setopt (curl_handle, CURLOPT_WRITEDATA, (void *) b);
/* Set User-Agent header. */
curl_easy_setopt (curl_handle, CURLOPT_USERAGENT, "http://tools.wmflabs.org/typoscan/");
/* Set Accept-Encoding header. */
curl_easy_setopt (curl_handle, CURLOPT_ACCEPT_ENCODING, "gzip,deflate");
/* Get content. */
CURLcode res = curl_easy_perform (curl_handle);
/* Check for errors. */
if (res != CURLE_OK)
error (1, 0, "curl_easy_perform() failed: %s",
curl_easy_strerror (res));
/* Check response code. */
long http_code;
curl_easy_getinfo (curl_handle, CURLINFO_RESPONSE_CODE, &http_code);
if (http_code != 200)
error (1, 0, "Didn't get 200, but %ld.\n", http_code);
return 1;
}
void help (void)
{
puts ("Usage: typoscan [--help] [--version] \n" \
" \n" \
"Retrieve a list of regular expressions from \n" \
"http://en.wikipedia.org/wiki/WP:AWB/T, read a Wikipedia dump file on \n" \
"STDIN and output a list of all page titles that match any of the \n" \
"regular expressions to STDOUT. Diagnostic output is directed to \n" \
"STDERR. \n" \
" \n" \
"Report bugs to: [email protected] \n" \
PACKAGE " home page: <http://tools.wmflabs.org/typoscan/>");
exit (0);
}
void version (void)
{
puts (PACKAGE_STRING " \n" \
"Copyright (C) 2013 Tim Landscheidt \n" \
"License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html> \n" \
"This is free software: you are free to change and redistribute it. \n" \
"There is NO WARRANTY, to the extent permitted by law.");
exit (0);
}
int main (int argc, char *argv [])
{
struct Buffer b;
struct option long_options [] =
{
{"help", no_argument, NULL, 'h'},
{"typos-pattern-file", required_argument, NULL, 't'},
{"verbose", no_argument, NULL, 'v'},
{"version", no_argument, NULL, 'V'},
{NULL, 0, NULL, 0}
};
int c, option_index = 0;
char *typos_pattern_filename = NULL;
/* Set program name. */
set_program_name (argv [0]);
while ((c = getopt_long (argc, argv, "ht:vV", long_options, &option_index)) != - 1)
switch (c)
{
case 'h':
help ();
break;
case 't':
if (!typos_pattern_filename)
typos_pattern_filename = optarg;
else
error (1, 0, "option --typos-pattern-file given more than once");
break;
case 'v':
run_verbose = 1;
break;
case 'V':
version ();
break;
default:
/* Unknown option. An error message has already
been printed by getopt_long (), so we can just
exit here. */
return 1;
}
if (optind != argc)
error (1, 0, "no arguments allowed");
if (typos_pattern_filename)
{
FILE *f = fopen (typos_pattern_filename, "r");
if (!f)
error (1, errno, "cannot open typos pattern file '%s'", typos_pattern_filename);
typolist_scan_file (f);
if (fclose (f))
error (1, errno, "cannot close typos pattern file '%s'", typos_pattern_filename);
}
else
{
/* Initialize curl. */
curl_global_init (CURL_GLOBAL_ALL);
/* Get typo list. */
if (!gettypolist ("https://en.wikipedia.org/w/index.php?title=Wikipedia:AutoWikiBrowser/Typos&action=raw", &b))
error (2, 0, "couldn't get typo list");
/* Clean up curl. */
curl_global_cleanup ();
/* Parse typo regular expressions. */
typolist_scan_buffer (b.buf, b.size);
}
/* Match against STDIN. */
dumpscanner_scan ();
return 0;
}