-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathchucknorrisfact-spider.php
91 lines (83 loc) · 1.98 KB
/
chucknorrisfact-spider.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
<?php
$endpoint = "https://chucknorrisfacts.fr/api/get?data=";
$crawled = 1;
$max = 998;
$saved = [];
while ($crawled < $max) {
echo $crawled . " \n";
$getted = file_get_contents($endpoint . "page:{$crawled}");
$getted = html_entity_decode($getted, ENT_QUOTES);
$getted = str_replace('\u','u',$getted);
$getted = preg_replace('/u([\da-fA-F]{4})/', '&#x\1;', $getted);
$facts = json_decode($getted, 1);
var_dump($facts);
foreach ($facts as $value) {
array_push($saved, $value['fact']);
}
$crawled++;
}
$savedCount = count($saved);
echo "\n \n Total : {$savedCount} \n";
$utf8_ansi2 = array(
"\u00c0" =>"À",
"\u00c1" =>"Á",
"\u00c2" =>"Â",
"\u00c3" =>"Ã",
"\u00c4" =>"Ä",
"\u00c5" =>"Å",
"\u00c6" =>"Æ",
"\u00c7" =>"Ç",
"\u00c8" =>"È",
"\u00c9" =>"É",
"\u00ca" =>"Ê",
"\u00cb" =>"Ë",
"\u00cc" =>"Ì",
"\u00cd" =>"Í",
"\u00ce" =>"Î",
"\u00cf" =>"Ï",
"\u00d1" =>"Ñ",
"\u00d2" =>"Ò",
"\u00d3" =>"Ó",
"\u00d4" =>"Ô",
"\u00d5" =>"Õ",
"\u00d6" =>"Ö",
"\u00d8" =>"Ø",
"\u00d9" =>"Ù",
"\u00da" =>"Ú",
"\u00db" =>"Û",
"\u00dc" =>"Ü",
"\u00dd" =>"Ý",
"\u00df" =>"ß",
"\u00e0" =>"à",
"\u00e1" =>"á",
"\u00e2" =>"â",
"\u00e3" =>"ã",
"\u00e4" =>"ä",
"\u00e5" =>"å",
"\u00e6" =>"æ",
"\u00e7" =>"ç",
"\u00e8" =>"è",
"\u00e9" =>"é",
"\u00ea" =>"ê",
"\u00eb" =>"ë",
"\u00ec" =>"ì",
"\u00ed" =>"í",
"\u00ee" =>"î",
"\u00ef" =>"ï",
"\u00f0" =>"ð",
"\u00f1" =>"ñ",
"\u00f2" =>"ò",
"\u00f3" =>"ó",
"\u00f4" =>"ô",
"\u00f5" =>"õ",
"\u00f6" =>"ö",
"\u00f8" =>"ø",
"\u00f9" =>"ù",
"\u00fa" =>"ú",
"\u00fb" =>"û",
"\u00fc" =>"ü",
"\u00fd" =>"ý",
"\u00ff" =>"ÿ");
file_put_contents('chucknorrisfacts.json', strtr(mb_convert_encoding(html_entity_decode(json_encode($saved)), "UTF-8"), $utf8_ansi2));
echo "\n\n";
echo "\n \n SAVED! \n";