-
Notifications
You must be signed in to change notification settings - Fork 1
/
index.js
49 lines (39 loc) · 3.63 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
var solr = require('solr');
var solrConf = { core:'/rfc-index', port:8080 };
var solrClient = solr.createClient(solrConf);
var fetch = require('./app/fetch').fetch
var extract = require('./app/extract')
var Process = require('./app/process').process
var rfcs = [2001, 2003, 2004, 2005, 2008, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2027, 2028, 2029, 2031, 2032, 2033, 2034, 2035, 2036, 2037, 2038, 2039, 2042, 2043, 2047, 2048, 2049, 2050, 2101, 2102, 2103, 2105, 2106, 2107, 2108, 2109, 2110, 2111, 2112, 2113, 2114, 2115, 2118, 2119, 2120, 2121, 2122, 2123, 2124, 2125, 2126, 2127, 2128, 2129, 2130, 2131, 2132, 2133, 2134, 2135, 2136, 2137, 2138, 2139, 2140, 2141, 2142, 2143, 2145, 2146, 2147, 2148, 2149, 2152, 2158, 2159, 2160, 2161, 2164, 2170, 2171, 2172, 2173, 2175, 2176, 2177, 2179, 2180, 2181, 2182, 2183, 2184, 2185, 2186,
2187, 2189, 2190, 2191, 2192, 2193, 2195, 2197, 2198, 2200, 2201, 2202, 2203, 2204, 2205, 2206, 2207, 2208, 2209, 2210, 2211, 2212, 2213, 2214, 2215, 2216, 2217, 2218, 2220, 2221, 2222, 2223, 2224, 2225, 2226, 2228, 2229, 2230, 2231, 2232, 2233, 2234, 2235, 2236, 2237, 2238, 2239, 2240, 2241, 2242, 2243, 2245, 2247, 2248, 2249, 2250, 2251, 2252, 2253, 2254, 2255, 2256, 2257, 2258, 2259, 2260, 2261, 2262, 2263, 2266, 2267, 2268, 2269, 2270, 2271, 2272, 2273, 2274, 2275, 2276, 2277, 2278, 2280, 2281, 2282, 2283, 2284, 2285, 2286, 2287, 2288, 2289, 2290, 2291, 2292, 2293, 2294, 2295, 2296,
2300, 2301, 2302, 2303, 2304, 2305, 2306, 2307, 2308, 2309, 2310, 2311, 2312, 2313, 2314, 2315, 2316, 2317, 2318, 2319, 2320, 2321, 2322, 2323, 2324, 2325, 2327, 2328, 2330, 2331, 2332, 2333, 2334, 2335, 2336, 2337, 2338, 2339, 2340, 2341, 2342, 2343, 2344, 2345, 2346, 2347, 2348, 2349, 2350, 2351, 2352, 2353, 2354, 2355, 2356, 2357, 2358, 2359, 2360, 2361, 2362, 2363, 2364, 2365, 2366, 2367, 2368, 2369, 2370, 2371, 2372, 2373, 2374, 2375, 2376, 2377, 2378, 2379, 2380, 2381, 2382, 2383, 2384, 2385, 2386, 2387, 2388, 2389, 2390, 2391, 2392, 2393, 2394, 2395, 2396, 2397, 2398, 2400, 2401,
2402, 2403, 2404, 2405, 2406, 2407, 2408, 2409, 2410, 2411, 2412, 2413, 2414, 2415, 2416, 2417, 2418, 2419, 2420, 2421, 2422, 2424, 2425, 2426, 2427, 2428, 2429, 2430, 2431, 2432, 2433, 2434, 2435, 2436, 2437, 2438, 2439, 2440, 2441, 2442, 2443, 2444, 2445, 2446, 2447, 2448, 2449, 2450, 2451, 2452, 2453, 2454, 2455, 2456, 2457, 2458, 2460, 2461, 2462, 2463, 2464, 2465, 2466, 2467, 2468, 2469, 2470, 2471, 2472, 2473, 2474, 2475, 2476, 2478, 2479, 2480, 2481, 2483, 2485, 2486, 2487, 2488, 2489, 2490, 2491, 2492, 2493, 2494, 2495, 2496, 2498, 2501, 2502, 2504, 2518, 2616, 2617, 2629,
2717, 2718, 2731, 2774, 2965, 3080, 3081, 3117, 3195, 3253, 3288, 3339, 3340, 3341, 3342, 3349, 3470, 3529, 3620, 3648, 3683, 3744, 3920, 3921, 3922, 3923, 3986, 4122, 4234, 4287, 4316, 4331, 4417, 4437, 4455, 4469, 4622, 4709, 4770, 4791, 4834, 4836, 4854, 5068, 5322];
var process = new Process();
process.crunch(extract.parseDom);
process.crunch(extract.getFrontTexts);
process.crunch(extract.getCategory);
process.crunch(extract.getKeywords);
process.crunch(extract.getFirstSection);
process.crunch(extract.getFulltext);
process.crunch(extract.getReferences);
process.use(function (id, doc) {
solrClient.add(doc, function (err) {
if (err) { console.log(err); }
solrClient.commit();
});
});
process.use(function (id, doc) {
console.log(id);
});
var catchAll = function(i) {
if (i<0) return;
var next = function() {
setTimeout(function() { catchAll(i-1) }, 500);
};
fetch(rfcs[i], function (content) {
process.run(rfcs[i], content, next)
}, next);
};
catchAll(rfcs.length);
solrClient.commit();