This repository has been archived by the owner on Nov 28, 2023. It is now read-only.
forked from quipo/GoOse
-
Notifications
You must be signed in to change notification settings - Fork 2
/
crawler_test.go
1582 lines (1444 loc) · 220 KB
/
crawler_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
package goose
import (
"fmt"
"io/ioutil"
"reflect"
"strings"
"testing"
)
// ReadRawHTML reads the specified HTML file (article.domain) and return the content
func ReadRawHTML(a Article) string {
path := fmt.Sprintf("sites/%s.html", a.Domain)
file, err := ioutil.ReadFile(path)
if err != nil {
panic(fmt.Sprintf("cannot read %q", path))
}
return string(file)
}
// ValidateArticle validates (test) the specified article
func ValidateArticle(expected Article, removed *[]string) error {
g := New()
g.config.titleDelimiters = []string{"|", " - ", " — ", "»", ":"}
//g.config.debug = true
result, err := g.ExtractFromRawHTML(expected.FinalURL, ReadRawHTML(expected))
if nil != err {
return err
}
// DEBUG
//fmt.Printf("article := Article{\n\tDomain: %q,\n\tTitle: %q,\n\tMetaDescription: %q,\n\tCleanedText: %q,\n\tMetaKeywords: %q,\n\tCanonicalLink: %q,\n\tTopImage: %q,\n}\n\n", expected.Domain, result.Title, result.MetaDescription, result.CleanedText, result.MetaKeywords, result.CanonicalLink, result.TopImage)
//fmt.Printf("%#v\n", result.Links)
if result.Title != expected.Title {
return fmt.Errorf("article title does not match. Got '%q', Expected '%q'", result.Title, expected.Title)
}
if result.MetaLang != expected.MetaLang {
return fmt.Errorf("article language does not match. Got '%q', Expected '%q'", result.MetaLang, expected.MetaLang)
}
if result.MetaDescription != expected.MetaDescription {
return fmt.Errorf("article metaDescription does not match. Got '%q', Expected '%q'", result.MetaDescription, expected.MetaDescription)
}
if !strings.Contains(result.CleanedText, expected.CleanedText) {
fmt.Printf("EXPECTED: %s \n\n\n\nACTUAL: %s\n\n", expected.CleanedText, result.CleanedText)
return fmt.Errorf("article cleanedText does not contain %q", expected.CleanedText)
}
// check if the specified strings where properly removed
for _, rem := range *removed {
if strings.Contains(result.CleanedText, rem) {
return fmt.Errorf("article cleanedText contains %q", rem)
}
}
if result.MetaKeywords != expected.MetaKeywords {
return fmt.Errorf("article keywords does not match. Got %q\n Expected: %q", result.MetaKeywords, expected.MetaKeywords)
}
if result.CanonicalLink != expected.CanonicalLink {
return fmt.Errorf("article CanonicalLink does not match. Got %q, Expected '%q'", result.CanonicalLink, expected.CanonicalLink)
}
if result.TopImage != expected.TopImage {
return fmt.Errorf("article topImage does not match. Got %q, Expected %q", result.TopImage, expected.TopImage)
}
if expected.Links != nil && !reflect.DeepEqual(result.Links, expected.Links) {
return fmt.Errorf("article Links do not match. Got \n\t%#v, \nExpected \n\t%#v", result.Links, expected.Links)
}
return nil
}
func TestGetCharsetFromContentType(t *testing.T) {
tt := []struct {
input string
expected string
}{
{"text/html;charset=UTF-8", "UTF-8"},
{"text/html,charset=EUC-KR", "EUC-KR"},
{"text/xhtml;charset=UTF-8", "UTF-8"},
{"application/xhtml+xml;charset=EUC-KR", "EUC-KR"},
{"text/plain;charset=EUC-KR", "EUC-KR"},
{"text/xml;charset=EUC-KR", "EUC-KR"},
{"text/javascript;charset=EUC-KR", "EUC-KR"},
{"application/javascript;charset=EUC-KR", "EUC-KR"},
{"application/xml;charset=EUC-KR", "EUC-KR"},
{"application/x-javascript;charset=UTF-8", "UTF-8"},
{"application/pdf;charset=UTF-8", "UTF-8"},
{"application/rss+xml;charset=UTF-8", "UTF-8"},
{"application/atom+xml;charset=UTF-8", "UTF-8"},
}
for _, tc := range tt {
t.Run(tc.input, func(t *testing.T) {
actual := getCharsetFromContentType(tc.input)
if actual != tc.expected {
t.Errorf("Unexpected result from charset extraction: EXPECTED: %s, ACTUAL: %s", tc.expected, actual)
}
})
}
}
func Test_AbcNewsGoCom(t *testing.T) {
article := Article{
Domain: "abcnews.go.com",
Title: "New Jersey Devils Owner Apologizes After Landing Helicopter in Middle of Kids' Soccer Game Forces Cancellation",
MetaDescription: "A co-owner of the NHL's New Jersey Devils said today that he's \"truly sorry\" after landing in a helicopter in the middle of a kids' soccer game in Newark.",
CleanedText: "A co-owner of the NHL's New Jersey Devils said today that he's \"truly sorry\" after landing in a helicopter in the middle of a kids' soccer game in Newark.\n\nDevils co-owner Joshua Harris said in a statement that he unexpectedly arrived in a chopper in the middle of Saint Benedict Preparatory School's soccer field Sunday night, causing many parents and kids \"frustration\" because the game ended up having to be canceled.\n\n\"I sincerely apologize to the kids and their coaches and families for the cancellation of their soccer game in Newark on Sunday night,\" said Harris, who also owns the NBA's Philadelphia 76ers. \"As a dad, who has spent hundreds of hours watching my kids play sports, I can understand the frustration, and for that, I am truly sorry.\"\n\nHelicopter 'Sounded a Little Funny' Before Crashing Into Florida Home\n\nMan Drives Car Into Ocean to Escape Police During Chase, Helicopter Video Shows\n\nNYPD Chopper Ride-Along: Here’s What Can Happen If You Fly Your Drone Near Aircraft\n\nHarris had been attending a Devils game and was indeed scheduled to land at St. Benedict's soccer field, which is regularly used as a helipad, according to an agreement with the school, a Prudential Center spokesman told ABC station WABC-TV in New York.\n\nBut the problem arose when the Devils game unexpectedly went into overtime and went into the kids' scheduled soccer game.\n\n\"Working with St. Benedict's, we have fixed the process to prevent any future issues,\" Harris said in the statement. \"While I can't take back what happened, I hope the coaches, the teams and their families would be open to being my guest at an upcoming Devils game, and I will be extending an invitation.\"\n\nThe Associated Press contributed to this report.",
MetaKeywords: "nj devils owner lands helicopter kids soccer game, helicopter youth soccer game, newark, new jersey, nj nj devils, nhl, josh harris, helicopter cancels soccer game, st benedict preparatory school, sta u13, youth soccer, us news, national news, local news",
CanonicalLink: "http://abcnews.go.com/US/nj-devils-owner-apologizes-landing-helicopter-middle-kids/story?id=35155591",
TopImage: "http://a.abcnews.go.com/images/US/ht_devils_helicopter_landing_hb_151112_16x9_992.jpg",
MetaLang: "en",
}
article.Links = []string{
"http://abcnews.go.com/topics/sports/nhl.htm",
"http://abcnews.go.com/topics/sports/hockey/new-jersey-devils.htm",
"http://abcnews.go.com/topics/sports/nba.htm",
"http://abcnews.go.com/topics/sports/basketball/philadelphia-76ers.htm",
"http://abcnews.go.com/US/helicopter-sounded-funny-crashing-florida-home/story?id=29836015",
"http://abcnews.go.com/US/nypd-chopper-ride-heres-happen-fly-drone-aircraft/story?id=33394237",
"http://abcnews.go.com/US/nypd-chopper-ride-heres-happen-fly-drone-aircraft/story?id=33394237",
"http://abc7ny.com/sports/devils-co-owners-helicopter-on-newark-field-prompts-cancelation-of-youth-soccer-game/1079546/",
}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
func Test_AdaLk(t *testing.T) {
article := Article{
Domain: "ada.lk",
Title: "Ada",
MetaDescription: "",
CleanedText: "",
MetaKeywords: "",
CanonicalLink: "",
TopImage: "http://static2.ada.lk/adaadmin/wp-content/uploads/You-I-logo.jpg",
MetaLang: "si", // Sinhala
}
//article.Links = []string{}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
func Test_BbcCom(t *testing.T) {
article := Article{
Domain: "bbc.com",
Title: "Crunch talks on new Greek bailout under way",
MetaDescription: "German and Greek finance ministers meet IMF and Eurogroup chiefs ahead of a crucial finance ministers' meeting on Greece's bailout request.",
CleanedText: "Greek bailout\n\nGreece bailout talks - in 60 secsp\n\nEuro's existential threat\n\nNothing left to lose?\n\nWhat we know\n\nThe German and Greek finance ministers are holding talks with IMF and Eurogroup chiefs ahead of a meeting of eurozone finance ministers on Friday.\n\nThe talks are aimed at striking a deal on the request made on Thursday by Greece for a new six-month bailout.\n\nGermany rejected the request despite it being welcomed by the European Commission.\n\nThe existing bailout deal expires at the end of the month and Greece could run out of money without a new accord.\n\nGermany's Wolfgang Schaeuble and Greece's Yanis Varoufakis are meeting in Brussels with IMF managing director Christine Lagarde and Jeroen Dijsselbloem, the Dutch finance minister who heads the Eurogroup.\n\nDuring a break in the talks, Mr Dijsselbloem said the situation was quite complicated: \"I am talking to the main players trying to find a solution. It will take some time, but there is still reason for some optimism, but it is still very difficult. I hope to tell you the outcome in a couple of hours time.\"\n\nThe unscheduled negotiations have delayed the start of the finance ministers' meeting, which was due to commence at 1400 GMT.\n\nArriving for the Eurogroup meeting, Mr Varoufakis said he hoped there would be a deal struck on Friday.\n\n\"The Greek government has not just gone the extra mile, but the extra 10 miles, and now we are expecting our partners not to meet us halfway, but a fifth of the way... Hopefully at the end of this, we come out with some white smoke,\" he said.\n\nMeanwhile, French President Francois Hollande reiterated that Greece belonged in the eurozone and there were no plans for it leaving, following talks in Paris with German Chancellor Angela Merkel.\n\n\"Greece is in the eurozone and it must remain in the eurozone,\" he told a joint news conference with Mrs Merkel.\n\nMrs Merkel said German politicians were \"very much geared towards Greece remaining in the euro\", adding that the Greek people had \"made a lot of sacrifices\" to do so.\n\nHowever, she said there was a need for \"significant improvements in the substance\" of the Greek request ahead of a vote in the German parliament next week.\n\nEarlier on Friday the German government's stance appeared to soften after a spokeswoman for Mrs Merkel said Greece's request for a loan extension from its eurozone partners provided \"a starting point\" for more talks.\n\n\"From the German government's point of view, [the request] is still not sufficient,\" said Christiane Wirtz. But \"it certainly offers a starting point for further talks.\"\n\nOne Greek government official described the phone call as \"constructive\", adding: \"The conversation was held in a positive climate, geared towards finding a mutually beneficial solution for Greece and the eurozone.\"\n\nGermany stands to lose up to €80bn if Greece were to leave the eurozone.\n\nAnalysis: Andrew Walker, economics correspondent\n\nGreece has certainly shifted its position. The letter from the Finance Minister, Yanis Varoufakis, to the Eurogroup asked for a six-month master financial assistance facility agreement.\n\nPayments under that agreement require Greece to comply with the measures set out in another document, the memorandum of understanding.\n\nThat is the hated economic policy programme agreed with the equally hated bailout lenders.\n\nIn the meantime, Mr Varoufakis was offering to refrain from unilateral actions that that would undermine the fiscal targets, economic recovery and financial stability and to ensure any new measures were fully funded.\n\nThose certainly look like concessions to Germany and others.\n\nWhat Berlin doesn't like is the manifest desire of the Greek government to use the proposed extension to revise the programme.\n\nGerman press 'fed up' with Greece\n\nMr Tsipras won elections in late January on a platform of rejecting the austerity measures tied to the bailout.\n\nA Greek government source said on Thursday the Eurogroup had \"just two choices: to accept or reject the Greek request. We will now discover who wants to find a solution, and who does not\".\n\nGreece formally requested a six-month extension to its eurozone loan agreement on Thursday, offering major concessions as it raced to avoid running out of cash within weeks.",
MetaKeywords: "keywords, added, to, test, case insensitive",
CanonicalLink: "http://www.bbc.com/news/business-31545115",
TopImage: "http://news.bbcimg.co.uk/media/images/81120000/jpg/_81120901_81120501.jpg",
MetaLang: "en",
}
err := ValidateArticle(article, &[]string{"~~~REMOVED~~~"})
if err != nil {
t.Error(err)
}
}
func Test_BbcCoUk(t *testing.T) {
article := Article{
Domain: "bbc.co.uk",
Title: "Homeopathy 'could be blacklisted'",
MetaDescription: "Ministers are considering whether homeopathy should be put on an NHS blacklist of banned treatments, the BBC learns.",
CleanedText: "Ministers are considering whether homeopathy should be put on a blacklist of treatments GPs in England are banned from prescribing, the BBC has learned.\n\nThe controversial practice is based on the principle that \"like cures like\", but critics say patients are being given useless sugar pills.\n\nThe Faculty of Homeopathy said patients supported the therapy.\n\nA consultation is expected to take place in 2016.\n\nThe total NHS bill for homeopathy, including homeopathic hospitals and GP prescriptions, is thought to be about £4m.\n\nHomeopathy is based on the concept that diluting a version of a substance that causes illness has healing properties.\n\nSo pollen or grass could be used to create a homeopathic hay-fever remedy.\n\nOne part of the substance is mixed with 99 parts of water or alcohol, and this is repeated six times in a \"6c\" formulation or 30 times in a \"30c\" formulation.\n\nThe end result is combined with a lactose (sugar) tablet.\n\nHomeopaths say the more diluted it is, the greater the effect. Critics say patients are getting nothing but sugar.\n\nCommon homeopathic treatments are for asthma, ear infections, hay-fever, depression, stress, anxiety, allergy and arthritis.\n\nSource: British Homeopathic Association\n\nBut the NHS itself says: \"There is no good-quality evidence that homeopathy is effective as a treatment for any health condition.\"\n\nWhat do you think about homeopathic treatments? Join our Facebook Q&A on Friday 13th November from 3pm, on the BBC News Facebook page, with the BBC website's health editor, James Gallagher.\n\nThe Good Thinking Society has been campaigning for homeopathy to be added to the NHS blacklist - known formally as Schedule 1 - of drugs that cannot be prescribed by GPs.\n\nDrugs can be blacklisted if there are cheaper alternatives or if the medicine is not effective.\n\nAfter the Good Thinking Society threatened to take their case to the courts, Department of Health legal advisers replied in emails that ministers had \"decided to conduct a consultation\".\n\nOfficials have now confirmed this will take place in 2016.\n\nSimon Singh, the founder of the Good Thinking Society, said: \"Given the finite resources of the NHS, any spending on homeopathy is utterly unjustifiable.\n\n\"The money spent on these disproven remedies can be far better spent on treatments that offer real benefits to patients.\"\n\nBut Dr Helen Beaumont, a GP and the president of the Faculty of Homeopathy, said other drugs such as SSRIs (selective serotonin reuptake inhibitors) for depression would be a better target for saving money, as homeopathic pills had a \"profound effect\" on patients.\n\nShe told the BBC News website: \"Patient choice is important; homeopathy works, it's widely used by doctors in Europe, and patients who are treated by homeopathy are really convinced of its benefits, as am I.\"\n\nThe result of the consultation would affect GP prescribing, but not homeopathic hospitals which account for the bulk of the NHS money spent on homeopathy.\n\nEstimates suggest GP prescriptions account for about £110,000 per year.\n\nAnd any decision would not affect people buying the treatments over the counter or privately.\n\nHealth Secretary Jeremy Hunt was criticised for supporting a parliamentary motion on homeopathy, but in an interview last year argued \"when resources are tight we have to follow the evidence\".\n\nMinister for Life Sciences, George Freeman, told the BBC: \"With rising health demands, we have a duty to make sure we spend NHS funds on the most effective treatments.\n\n\"We are currently considering whether or not homeopathic products should continue to be available through NHS prescriptions.\n\n\"We expect to consult on proposals in due course.\"",
MetaKeywords: "",
CanonicalLink: "http://www.bbc.co.uk/news/health-34744858",
TopImage: "http://ichef.bbci.co.uk/news/1024/cpsprodpb/B4FE/production/_86643364_m7410098-homeopathic_pills-spl.jpg",
MetaLang: "en",
}
article.Links = []string{
"http://www.britishhomeopathic.org/how-are-homeopathic-medicines-made/",
"http://www.nhs.uk/Conditions/homeopathy/Pages/Introduction.aspx#when-used",
"https://www.facebook.com/bbcnews/",
"http://www.legislation.gov.uk/uksi/2004/629/schedule/1/made",
"https://www.newscientist.com/article/dn22241-hail-jeremy-hunt-the-new-minister-for-magic/",
"http://www.lbc.co.uk/watch-jeremy-hunt-live-on-lbc-from-7pm-96835",
"https://twitter.com/JamesTGallagher",
}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
func Test_BizJournalsCom(t *testing.T) {
article := Article{
Domain: "bizjournals.com",
Title: "Activist investor sells off $1 billion worth of Microsoft stock",
MetaDescription: "ValueAct will still retain a 0.7 percent stake in Microsoft after the sale.",
CleanedText: "The San Francisco-based activist investing firm that helped pushed Steve Ballmer out of Microsoft’s top job announced Thursday it will sell some of its shares of the company.\n\nValueAct Capital bought a $2 billion stake in Microsoft 2013 and then gained a spot on the Microsoft company’s board. The firm was part of the group that forced Ballmer into retiring ahead of schedule, and ushering in a new era for the company under Satya Nadella.\n\nThat move has paid off well for shareholders, including Ballmer, who remains the company's largest shareholder. Microsoft (Nasdaq: MSFT) share prices have climbed more than 90 percent since Nadella took office. Microsoft stock is now selling for more than $53 a share, almost up to the highs the company hit ahead of the dot com crisis.\n\nNow, ValueAct will sell about a quarter of its stake or nearly 18.7 million Microsoft shares worth just under $1 billion. The firm will retain a 0.7 percent stake in the company.\n\nValueAct Capital's President G. Mason Morfit said Microsoft shares represent more than 20 percent of the firm’s overall portfolio and will sell some to diversify and buy stock in another company.\n\nMicrosoft's stock price increase is partially why the company has come to represent so much of ValueAct's portfolio. But ValueAct also was a major shareholder in Valeant Pharmaceuticals, whose stock has dropped 70 percent over the last three months, according to Forbes.\n\nMorfit will run for re-election to Microsoft’s board of directors at a shareholder’s meeting later this year. He says Microsoft will remain one of the firm’s top positions.",
MetaKeywords: "",
CanonicalLink: "http://www.bizjournals.com/seattle/blog/techflash/2015/11/activist-investor-sells-off-1-billion-worth-of.html",
TopImage: "http://media.bizj.us/view/img/2167041/mason-morfit*400xx306-307-0-25.jpg",
MetaLang: "en",
}
article.Links = []string{
"http://www.bizjournals.com/profiles/company/us/wa/redmond/microsoft_corporation/1087001",
"http://www.bizjournals.com/profiles/company/us/ca/san_francisco/valueact_capital_partners_lp/13646",
"http://www.bizjournals.com/seattle/blog/techflash/2014/03/microsoft-adds-activist-investor-to-board.html",
"http://www.bizjournals.com/seattle/print-edition/2013/08/30/not-just-a-new-ceo-steve-ballmers.html",
"http://www.bizjournals.com/profiles/company/us/ca/aliso_viejo/valeant_pharmaceuticals_international/20416",
"http://www.forbes.com/sites/antoinegara/2015/11/12/hedge-fund-valueact-hurt-by-valeant-sells-1-billion-of-surging-microsoft-stock/?utm_campaign=yahootix&partner=yahootix",
}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
/*
func Test_BlogCanpanInfo(t *testing.T) {
article := Article{
Domain: "blog.canpan.info",
Title: "5月23日(水)-笹川陽平ブログ(日本財団会長)",
MetaDescription: "",
CleanedText: "",
MetaKeywords: "",
CanonicalLink: "",
TopImage: "http://media.bizj.us/view/img/2167041/mason-morfit*400xx306-307-0-25.jpg",
MetaLang: "ja", // unfortunately this is currently recognised as zh
}
article.Links = []string{}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
*/
func Test_BlogSpotCoUK(t *testing.T) {
article := Article{
Domain: "blogspot.co.uk",
Title: "Five ways to grow your business this Small Business Week",
MetaDescription: "",
CleanedText: "Susan Brown, owner of Los Angeles gardening store Potted, recently updated her business listing on Google. Susan says, “Putting your business on Google lets people find you easily. Your directions are right there, your hours are right there, what you sell is right there.”\n\nThanks to her decision, Susan has seen more customers walk through her door: “So many of the customers that come in here find us on Google. As a small business, you want to use every opportunity to help your business grow.”\n\nNational Small Business Week is one of those opportunities. So from May 4-8, instead of three cheers, we’re giving you five—five simple ways to get your small business online and growing.\n\nCelebrating National Small Business Week with Google\n\nA handful of bright ideas and quick-fixes, all five ways are doable in a week or less and will help you throw a digital spotlight on your business all year round.\n\n1. SHOW UP ON GOOGLE\n\nCheck to see how your business shows up on Google. Then, claim your listing so that customers can find the right info about your business on Google Search and Maps. When you claim your listing this week: You could be one of 100 randomly selected businesses to get a 360° virtual tour photoshoot—a $255 value.\n\n2. LEARN FROM PROS & PEERS\n\nGet business advice from experts and colleagues in the Google Small Business Community. They're ready to chat! When you visit or join this week: Share your tips for summertime business success and we'll feature your tip in front of an audience of 400K members.\n\n3. WORK BETTER, TOGETHER:\n\nWith professional email, calendars, and docs that you can access anywhere, Google Apps for Work makes it easy for your team to create and collaborate. When you sign up this week you’ll receive 25% off Google Apps for Work for one year.\n\n4. CLAIM YOUR DOMAIN:\n\nWith a custom domain name and website, Google Domains helps you create a place for your business on the web. When you sign up and purchase a .co, .com or .company domain this week you could be one of 1,500 randomly selected businesses to get reimbursed for the first year of registration.\n\n5. GET ADVICE FROM AN ADVERTISING PRO:\n\nLearn how you can promote your business online and work with a local digital marketing expert to craft a strategy that’s right for your business goals. When you RSVP this week you’ll get help from an expert who knows businesses like yours.\n\nWhile these resources are available year-round, there’s no better time to embark on a digital reboot.\n\nFor more information, visit google.com/smallbusinessweek.\n\nWishing everyone a happy and productive Small Business Week!\n\nPS: To join the conversation, use #5Days5Ways and #SBW15 on G+, Facebook or Twitter.",
MetaKeywords: "",
CanonicalLink: "http://googlewebmastercentral.blogspot.com/2015/05/five-ways-to-grow-your-business-this.html",
TopImage: "http://3.bp.blogspot.com/-6SCcCupadL0/VUnQdhs_98I/AAAAAAAAA7Q/wCdIXm6v9Sg/s72-c/Screen%2BShot%2B2015-05-06%2Bat%2B10.22.08%2BAM.png",
MetaLang: "en",
}
article.Links = []string{
"http://gybo.com/resources",
"https://www.gybo.com/ca/mountain-view/resources#way1",
"http://gybo.com/resources#way2",
"http://gybo.com/resources#way3",
"http://gybo.com/resources#way4",
"http://gybo.com/resources#way5",
"https://www.gybo.com/ca/mountain-view/resources",
}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
func Test_BloombergCom(t *testing.T) {
article := Article{
Domain: "bloomberg.com",
Title: "U.K. Needs 'Urgent Action' to Keep Banks in London, BBA Says",
MetaDescription: "British lawmakers need to take “urgent action” to ensure the U.K. maintains its position as the leading global financial center or risk the departure of banks to cities such as Singapore and Hong Kong, according to the British Bankers’ Association.",
CleanedText: "British lawmakers need to take “urgent action” to ensure the U.K. maintains its position as the leading global financial center or risk the departure of banks to cities such as Singapore and Hong Kong, according to the British Bankers’ Association.\n\nNew regulations, taxes and depressed economic activity in Europe have resulted in an 8 percent drop in British banking jobs, with two-thirds of BBA members saying they’ve moved business elsewhere since 2010, the lobby group said in a report Friday. The BBA recommends a softening of the law separating retail operations from investment banking, further tax cuts and a reworking of visa limits to make it easier to hire from abroad.\n\n“We have now reached a watershed moment in Britain’s competitiveness as an international banking center” and “many international banks have been moving jobs overseas or deciding not to invest in the U.K.,” BBA Chief Executive Officer Anthony Browne said in the report. “Wholesale banking is an internationally mobile industry and there is a real risk this decline could accelerate.”\n\nChancellor of the Exchequer George Osborne, 44, outlined a “new settlement” for the City of London in a speech in June, pledging to curtail huge fines and amend regulations to “get the balance right.” As memories fade of the 1 trillion pounds ($1.5 trillion) of U.K. taxpayer support given to banks amid the 2008 crisis, this year the government has backed down on some issues after lobbying from the BBA, while HSBC Holdings Plc has said it may leave London.\n\nOsborne diluted a levy on U.K. banks and pushed out the regulator’s chief misconduct enforcer, Martin Wheatley, and most recently u-turned on a plan to assume senior bank managers are guilty until proven innocent, which lenders blamed for hindering recruitment of top foreign executives.\n\n“We recognize the change of tone in conduct regulation, important developments in the senior managers regime, the proposed reduction in the bank levy, greater certainty over tax for international banks,” the BBA said.\n\nNevertheless, London’s financial sector continues to shrink while its rivals grow, according to the report. Compared with 35,000 jobs losses and a 12 percent fall in U.K. banking assets in the past four years, assets in the U.S. have grown by the same percentage, while in Singapore and Hong Kong they have climbed by 24 percent and 34 percent respectively.\n\nEuropean firms are also losing market share to U.S. rivals in wholesale banking, which is the part of banks that cater to large corporates and other financial institutions. From 2010 to 2014, the wholesale market share of the top five European banks fell to 24 percent from 26 percent, whereas the share of the top five U.S. banks has risen to 48 percent from 44 percent, the BBA said.\n\nLondon is also losing market share in lending and initial public offerings, the BBA said. Wholesale banking’s global return-on-equity, a measure of profitability, is expected to fall to an average of 6.5 percent by 2017, about a third of the 18 percent-average between 2000 and 2006, according to the report, co-authored by consulting firm Oliver Wyman.\n\nOsborne’s overtures to the industry were counterbalanced by the high cost of ring-fencing -- a law that requires splitting off retail units to protect them from investment banking losses, the BBA said. “Uncertainty arising from the rapidly changing tax regime and European Union referendum are inhibiting business planning and discouraging investment,” according to the report.\n\nThe BBA’s wishlist includes a demand the Chancellor cut the bank levy faster. Under current plans the tax will be reduced over six years and then limited to domestic balance sheets until 2021. The lobby group also wants an 8 percent surcharge on bank profits to be phased out over time.\n\nFinancial services is the U.K.’s biggest export industry selling 62 billion pounds abroad every year, and employing more than 405,000 people, the BBA said.\n\nBefore it's here, it's on the Bloomberg Terminal.",
MetaKeywords: "Jobs,Banking,London",
CanonicalLink: "http://www.bloomberg.com/news/articles/2015-11-13/u-k-needs-urgent-action-to-keep-banks-in-london-bba-says",
TopImage: "http://assets.bwbx.io/images/ifXjLu6rC3Tg/v1/-1x-1.jpg",
MetaLang: "en",
}
article.Links = []string{
"http://bloom.bg/dg-ws-core-bcom-a1",
}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
func Test_businessInsiderCom(t *testing.T) {
article := Article{
Domain: "businessinsider.com",
Title: "Credit-card chips could slow Black Friday lines",
MetaDescription: "A change to how retailers process payments could make Americans stand in line longer this Black Friday.",
CleanedText: "Just when you thought there couldn't be another way to make Black Friday any more miserable for shoppers and retail employees, the credit-card industry came up with one.\n\nCredit-card companies last month began to mandate new technology that uses chips instead of magnetic stripes. It's a change made for a very good reason: card security.\n\nThe credit-card industry self-imposed October 1 as the deadline for the new card readers, though many consumers had received chip-enabled credit and debit cards — which will still work on the old \"swipe\" card processors — long before that.\n\nThe timing of this wider rollout, however, has retail and payments experts warning that this will slow things down at the checkouts on the November 27 shopping day.\n\n\"Any time you introduce a major change like this, there's going to be confusion,\" said Matt Schulz, senior industry analyst with CreditCards.com. \" There's no question this is going to cause some slowdown on Black Friday.\"\n\nThe change itself is simple: Instead of swiping the card through the magnetic-strip reader, shoppers now have to insert it — chip side up — into a slot on the bottom of the device.\n\nBut here's where the delays come in. People who are unfamiliar with the process will swipe as they always have, then be told it didn't work because they have a new chip-enabled card. Then they must be shown how to insert it, and leave it in, so the payment can be processed.\n\nNow multiply that by thousands, and add in the fact that people have been in line since the crack of dawn, elbowed their way to that bargain bin, and then had to wait again just to get to the register, and you can see why even a small delay will test patience. It's called the EMV chip, and it just might wreak havoc on holiday shopping.\n\n\"There is going to be a rude awakening\" for retailers, said Jared Drieling, business intelligence manager for The Strawhecker Group, an Omaha, Nebraska-based advisory firm focusing on payments. \"The industry is still bickering over how long an EMV transaction takes.\"\n\nAs many as 47% of US merchants will have new technology in place by the end of 2015, according to a survey conducted earlier this year by the Payments Security Task Force, an industry-backed group of financial services firms and leading retailers. Already, 40% of Americans have been issued new chip-enabled cards.\n\nOf course the nightmare scenario that Drieling is warning about is dependent on a lot of factors. Some customers have been using the chip technology for weeks, and some retailers don't have the readers yet. There is a wide disparity in how individual retailers have gotten ready for the switch.\n\nBest Buy, Macy's, and Walmart stores have been fully outfitted with new card readers, representatives for those companies said. Macy's and Walmart have also reissued store-branded credit cards with new EMV chips embedded in them. Sears, on the other hand, says it is \"continuously working to further enhance the security of our systems,\" according to a spokesman — but declined to provide specifics for Black Friday.\n\nJ. Craig Shearman, a spokesman for the National Retail Federation, said the new card readers would be at \"most major retailers and large national chains.\" The progress of smaller shops\u00a0in\u00a0adapting the chips is not as clear, but those shops\u00a0are less likely to\u00a0be open the day after Thanksgiving anyway.\n\nShearman didn't argue with the notion that things could slow down, but he said it was not clear how much longer it would take to process each transaction.\n\nFor retailers, Black Friday and the ensuing weekend is crucial to performance. Americans packed malls and stores last year after Thanksgiving, driving more than $50 billion in revenue to retailers, the National Retail Federation reported in 2014.\n\nOf course, there are lots of ways to avoid even having to find out. Stay home. Turkey and stuffing is better on day two anyway.\n\nNOW WATCH: JAMES ALTUCHER: 'Warren Buffett is a f-----g liar'",
MetaKeywords: "",
CanonicalLink: "http://www.businessinsider.com/credit-card-chips-could-slow-black-friday-lines-2015-11",
TopImage: "http://static5.businessinsider.com/image/56410a64bd86ef18008c8901/this-little-change-could-make-black-friday-even-more-miserable-this-year.jpg",
MetaLang: "en",
}
article.Links = []string{
"http://www.businesswire.com/news/home/20150504005631/en/Issuers-Forecast-U.S.-Shift-Chip-Cards-Complete",
"http://www.usatoday.com/story/money/business/2015/10/01/chip-credit-debit-card-readers-october-1/73140516/",
"http://www.businessinsider.com/james-altucher-warren-buffett-rant-holding-period-2015-10",
}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
func Test_chinaComCn(t *testing.T) {
article := Article{
Domain: "china.com.cn",
Title: "马来西亚国际航空发布全新品牌宣传片_生活_中国网",
MetaDescription: "马来西亚国际航空发布全新品牌宣传片",
CleanedText: "",
MetaKeywords: "",
CanonicalLink: "",
TopImage: "http://life.china.com.cn/style/images/logo.png",
MetaLang: "zh",
}
//article.Links = []string{}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
func Test_CnnCom(t *testing.T) {
article := Article{
Domain: "cnn.com",
Title: "Exhausted F1 star Lewis Hamilton crashes car",
MetaDescription: "After running away with the Formula One world championship, Lewis Hamilton explains he has run out of gas after crashing his car in Monaco.",
CleanedText: "Story highlights Lewis Hamilton reveals Monaco car accident on eve of Brazilian GP\n\nF1 world champion says he was exhausted and had a fever\n\nHamilton organized surprise party for his Mum after Mexico GP\n\nThe Mercedes driver revealed he crashed his car in Monaco after \"heavy partying\" last weekend. He turned up for this weekend's Brazilian Grand Prix a day late after taking time off to recover.\"I've not been well with a fever but I also had a road accident in Monaco on Monday night,\" Hamilton explained on his account.\"Nobody was hurt, which is the most important thing. I made very light contact with a stationary vehicle.\"Talking with the team and my doctor, we decided together that it was best for me to rest at home and leave a day later.\" Dear TeamLH, just wanted to let you know why things have been quiet on social media the past few days. I've not been well with a fever but I also had a road accident in Monaco on Monday night. Whilst ultimately, it is nobody's business, there are people knowing my position that will try to take advantage of the situation and make a quick buck. NO problem. Nobody was hurt, which is the most important thing. But the car was obviously damaged and I made very light contact with a stationary vehicle. Talking with the team and my doctor, we decided together that it was best for me to rest at home and leave a day later. But i am feeling better and am currently boarding the plane to Brazil. However, I am informing you because I feel we all must take responsibility for our actions. Mistakes happen to us all but what's important is that we learn from them and grow. Can't wait for the weekend Brazil🙌🏾 Bless Lewis\n\nA photo posted by Lewis Hamilton (@lewishamilton) on Nov 11, 2015 at 2:50pm PST\n\nHamilton posted the news to his fans, who he refers to as \"Team LH,\" but he also added: \"Ultimately, it is nobody's business, there are people knowing my position that will try to take advantage of the situation and make a quick buck.\"\n\nAfter arriving in Sao Paulo for the penultimate race of the 2015 season, the three-time world champion inevitably faced questions from the assembled media.\n\nJUST WATCHED \"The guy's driving like god...\" Replay More Videos ... MUST WATCH\n\n03:19 Both Hamilton and his Mercedes teammate Nico Rosberg always speak to reporters on the Thursday before a race weekend, while the British driver also has obligations with the UK press.\n\nHamilton explained that his busy schedule since the last race in Mexico 12 days ago had included throwing a surprise 60th birthday party for his mother Carmen in London last Sunday, the night before his Monaco prang.\n\n\"\"It was a result of heavy partying and not much rest for 10 days. I am a bit run down,\" Hamilton, who spent four more days in Mexico after the race, said in his BBC Sport column.\n\n\"When I got back to the UK, I was trying to organize my Mum's 60th birthday. The party turned out great but by the end of it I was exhausted. I had been busy for two solid weeks and I basically collapsed.\"\n\nJUST WATCHED How well do you know Lewis Hamilton? Replay More Videos ... MUST WATCH\n\n01:41 Although an element of mystery still surrounds Hamilton's Monaco car crash, it's not the first time the 30-year-old has been involved in driving drama off the track.\n\nAt the 2010 Australian Grand Prix, Hamilton was fined for dangerous driving after deliberately spinning his wheels and skidding on his way out of the Albert Park circuit. In 2007, when he was an F1 rookie, his car was impounded in France after he was caught speeding.\n\nHamilton, who wrapped up the 2015 world title at the U.S. Grand Prix in Austin, Texas with three races to spare, is now focused on getting back to business in Brazil.\n\n\"I feel good, I'm on an up slope, so a lot closer to 100%\" Hamilton told reporters at the Interlagos track. \"I'm excited to be here. I'm definitely cherishing the moments I'm in the car.\"\n\nTell us what you think of Hamilton's crash on CNN Sport's Facebook page",
MetaKeywords: "f1, lewis hamilton, brazilian grand prix, monaco, mercedes, motorsport, Exhausted F1 star Lewis Hamilton crashes car - CNN.com",
CanonicalLink: "http://edition.cnn.com/2015/11/13/motorsport/formula-one-lewis-hamilton-crashes-car-news/index.html",
TopImage: "http://i2.cdn.turner.com/cnnnext/dam/assets/151113115049-lewis-hamilon-media-brazil-large-169.jpg",
MetaLang: "en",
}
article.Links = []string{
"https://instagram.com/p/99kB_8L00w/",
"http://www.bbc.co.uk/sport/features/34783569",
"http://edition.cnn.com/2010/SPORT/motorsport/08/24/motorsport.f1.hamilton.fine.melbourne/",
"http://edition.cnn.com/2015/10/25/motorsport/motorsport-usgp-hamilton-vettel-rosberg/",
"https://www.facebook.com/cnnsport",
}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
func Test_DailyMailCoUk(t *testing.T) {
article := Article{
Domain: "dailymail.co.uk",
Title: "Debenhams and House of Fraser charge for PAPER BAGS as Tesco give them away",
MetaDescription: "Major high street stores including Debenhams and House of Fraser have started charging up to 10p for paper carrier bags – despite them being exempt from the new laws brought in last month.",
CleanedText: "Major high street stores have been accused of ripping off shoppers by charging up to 10p for paper carrier bags – despite them being exempt from the new laws brought in last month.\n\nOutraged shoppers have hit out at Debenhams and House of Fraser claiming they are 'cashing in' by charging for paper bags when other high street shops offer them for free.\n\nHouse of Fraser has said the charge for paper bags had been introduced for 'ethical and moral' reasons, and that all proceeds would be donated to charity.\n\nHowever, shoppers have taken to Twitter to express their anger at the charge.\n\nHouse of Fraser has said the paper bag charge has been brought in at stores for 'ethical and moral' reasons\n\nPaper bags are being handed out to shoppers at London branch of Tesco weeks after 5p charge introduced\n\nTwitter user Jimmy said: 'Absolutely disgusted! Just spent £180 on shoes and you have the audacity to make me pay 5p for a 'cardboard' bag #shocking'\n\nAnthony Bongos added: 'I can't understand why you are charging for paper carrier bags. This isn't the law, is it you cashing in on the law?'\n\nA spokesperson for House of Fraser said: 'We have made the ethical and moral decision to support the introduction of a 5p charge on all plastic and paper bags.'\n\nShoppers in Debenhams have also reported being charged to paper bags, with some saying they have been made to pay up to 10p.\n\nSuzanne Foley said: '£162 for a suit no suit bags and then get charged 10p for a large bag, what's that all about debenhams!' (sic)\n\nAnd Martena David added: '£162 for a suit no suit bags and then get charged 10p for a large bag, what's that all about debenhams!' (sic)\n\nElsewhere, some Tesco stores have started giving customers free paper bags just weeks after the 5p charge for plastic bags caused chaos around the country.\n\nTwitter uses have expressed their outrage after being made to pay for paper bags at House of Fraser\n\nWHY IS THERE A 5P CHARGE?\n\nWhy charge for bags?\n\nThe rules are being rolled out by the Government's Department for Environment, Food & Rural Affairs. It claims the change will save £60m in litter clean-up costs and £13m in carbon savings.\n\nSo where will all the money go?\n\nThe levy for supermarkets and big shops employing more than 250 staff will raise more than £70m a year for 'good causes'. Shops can also take a 'reasonable costs' cut. The Government will pocket the VAT raising an estimated £19m a year.\n\nCan I still get free bags?\n\nYes. If you have bought food such as fish, uncooked meat or prescription medicines then the retailer should still offer bags for nothing.\n\nBut problems occur if you buy anything else at the same time. For example, if the bag shares space with a packet of cornflakes it will cost you 5p. You should not be charged if a shop uses paper bags.\n\nA London store has been handing out recyclable small bags as an alternative to shoppers just picking up a handful of groceries.\n\nThe bags feature the phrase 'love food hate waste'.\n\nThe new law does not prevent shops from handing out free paper bags, a source from the Department for Food, Environment and Rural Affairs told the Evening Standard.\n\n'The key thing is encouraging people to reuse bags,' they said.\n\n'The best thing to do is to have a plastic bag in your pocket.\n\n'But clearly paper bags can be recycled and do degrade better than plastic bags, and they won't end up strangling a turtle.'\n\nEngland was the last place in the UK to introduce the 5p bag charge.\n\nSome supermarkets around the UK where forced to put security tags on baskets and trolleys after shoppers began taking them home to carry their groceries.\n\nMailOnline has contacted Debenhams and House of Fraser for comment.\n\nDebenhams has been accused of ripping off customers across the UK by charging up to 10p for paper bags\n\nMOST WATCHED NEWS VIDEOS\n\nPrevious\n\n1\n\n2\n\n3\n\nNext\n\nCalamitous cat Mog stars in Sainsbury's Christmas campaign\n\nQVC underwear models in tight briefs video goes viral\n\nJosh Cooper viciously beaten in viral Facebook video\n\nNew video shows horror after Bernado Elbaz jumps overboard\n\nWatch this adorable kitten bounce for joy as owner returns\n\nShocking footage of pigs being abused in pig slaughterhouse\n\nWhite man and Asian girlfriend abused for being together\n\nHigh winds see passenger plane veer from side to side\n\nBernardo Elbaz filmed moments before he went overboard\n\nJames Hausman is hit in head by glass door on cruise ship\n\nSandi Thom in Radio 2 playlist rant: 'Shove it up your a**'\n\nChilling video shows angry mob lynching robbers in Venezuela\n\n'Oh my God': Sobbing ex-private schoolgirl screams to her...\n\nJihadi John is 'evaporated' at ISIS' 'crucifixion'...\n\nKATIE HOPKINS: Sprinkle your Movember moustache with petrol...\n\nBungling council workers block off family's driveway with...\n\nAre Europe's open borders on the brink of collapse? EU...\n\nTory minister accused of 'insulting' the poor apologises for...\n\n'What have you done? You murdered him!': New video shows...\n\nFamily of Indian grandmother scalded to death by shower in...\n\nThe moment some of the Nazis' most notorious murderers were...\n\n'You have a horse face. Your girlfriend is a whore':...\n\n'My husband wants him to hang but I still love my son - even...\n\nMob of teenage thugs surround boy, 12, as he is thrown to...\n\nMOST READ NEWS\n\nPrevious\n\nNext\n\n●\n\n●\n\n●",
MetaKeywords: "Debenhams,House,Fraser,charge,PAPER,BAGS,Tesco,started,giving,away,free",
CanonicalLink: "http://www.dailymail.co.uk/news/article-3316789/Debenhams-House-Fraser-charge-PAPER-BAGS-Tesco-started-giving-away-free.html",
TopImage: "http://i.dailymail.co.uk/i/pix/2015/11/13/10/2E6847FA00000578-0-image-a-9_1447409694956.jpg",
MetaLang: "en",
}
article.Links = []string{
"http://www.standard.co.uk/news/uk/tesco-is-giving-out-paper-bags-to-dodge-the-5p-carrier-bag-charge-a3112131.html",
}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
func Test_DailyMailCoUk2(t *testing.T) {
article := Article{
Domain: "dailymail.co.uk.2",
Title: "England midfielder Ruben Loftus-Cheek relishing World Cup challenge",
MetaDescription: "Ruben Loftus-Cheek enjoyed the pressure and responsibility of trying to keep Crystal Palace up - and relishes the added load that comes with playing for England at the World Cup.",
CleanedText: "",
MetaKeywords: "England,midfielder,Ruben,Loftus,Cheek,relishing,World,Cup,challenge",
CanonicalLink: "http://www.dailymail.co.uk/sport/football/article-5764541/England-midfielder-Ruben-Loftus-Cheek-relishing-World-Cup-challenge.html",
TopImage: "http://i.dailymail.co.uk/i/newpix/2018/05/23/23/4C8C357300000578-0-image-a-28_1527113316154.jpg",
MetaLang: "en",
}
//article.Links = []string{}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
func Test_DiaridegironaCat(t *testing.T) {
article := Article{
Domain: "diaridegirona.cat",
Title: "Valtonyc fuig d'Espanya per evitar la presó el dia que l'anuncia l'(a)phònica",
MetaDescription: "Valtonyc ha abandonat l'Estat espanyol per evitar entrar a presó. El festival (a)phònica de Banyoles va donar ahir a conèixer tots els detalls de la programació del que serà la 15a edició, que incloïa també l'actuació de Valtonyc, el raper que ha d'e",
CleanedText: "",
MetaKeywords: "Valtonyc,fuig,dEspanya,evitar,preso,lanuncia,laphonica,Cultura,noticia,noticies,noticies avui,diari de girona,catalunya,diari catala,diari.",
CanonicalLink: "http://www.diaridegirona.cat/cultura/2018/05/24/valtonyc-fuig-despanya-evitar-preso/915484.html",
TopImage: "https://fotos00.diaridegirona.cat/2018/05/23/690x278/valtonyc-fuig.jpg",
MetaLang: "ca",
}
//article.Links = []string{}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
// this test works on a website that embeds someone else's content like a frame
func Test_EpaperNavbharattimesWithFrame(t *testing.T) {
article := Article{
Domain: "epaper.navbharattimes.com",
Title: "Details",
MetaDescription: "",
CleanedText: "",
MetaKeywords: "",
CanonicalLink: "",
TopImage: "/images/dnsenlarge.gif",
MetaLang: "hi",
}
//article.Links = []string{}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
// Relative image test
func Test_MatchExactDescriptionMetaTag(t *testing.T) {
article := Article{
Domain: "vnexpress.net",
Title: "Khánh Ly đến viếng mộ Trịnh Công Sơn",
MetaDescription: "Chiều 1/5, danh ca mang theo đóa hoa hồng vàng và chai rượu đến thăm người bạn tri kỷ sau lần gặp gỡ cuối cùng vào năm 2000. - VnExpress Giải Trí",
CleanedText: "",
MetaKeywords: "Khánh Ly đến viếng mộ Trịnh Công Sơn - VnExpress Giải Trí",
CanonicalLink: "http://giaitri.vnexpress.net/tin-tuc/gioi-sao/trong-nuoc/khanh-ly-den-vieng-mo-trinh-cong-son-2985539.html",
FinalURL: "http://giaitri.vnexpress.net/tin-tuc/gioi-sao/trong-nuoc/khanh-ly-den-vieng-mo-trinh-cong-son-2985539.html",
TopImage: "http://l.f11.img.vnecdn.net/2014/05/02/2-5456-1398995030_490x294.jpg",
MetaLang: "vi",
}
article.Links = []string{
"http://giaitri.vnexpress.net/tin-tuc/nhac/lang-nhac/khanh-ly-se-tham-mo-trinh-cong-son-khi-ve-viet-nam-2981844.html",
"http://giaitri.vnexpress.net/tin-tuc/gioi-sao/trong-nuoc/sao-viet-buc-xuc-vi-bi-su-dung-hinh-anh-trai-phep-3293246.html",
"http://giaitri.vnexpress.net/tin-tuc/gioi-sao/trong-nuoc/thien-than-7-tuoi-cua-guong-mat-than-quen-nhi-3294189.html",
"http://giaitri.vnexpress.net/tin-tuc/gioi-sao/quoc-te/guong-mat-bien-doi-theo-thoi-gian-cua-huynh-hieu-minh-3294360.html",
"http://giaitri.vnexpress.net/photo/trong-nuoc/duc-hai-lien-tuc-hon-khanh-my-tren-tham-do-3294882.html",
"http://giaitri.vnexpress.net/tin-tuc/gioi-sao/quoc-te/khong-ai-nhan-ra-nguoi-vo-gia-cu-richard-gere-3294588.html",
"http://giaitri.vnexpress.net/photo/trong-nuoc/duc-hai-lien-tuc-hon-khanh-my-tren-tham-do-3294882.html",
"http://giaitri.vnexpress.net/photo/trong-nuoc/duc-hai-lien-tuc-hon-khanh-my-tren-tham-do-3294882.html",
"http://giaitri.vnexpress.net/tin-tuc/gioi-sao/trong-nuoc/thien-than-7-tuoi-cua-guong-mat-than-quen-nhi-3294189.html",
"http://giaitri.vnexpress.net/photo/trong-nuoc/kyo-york-chup-anh-keu-goi-bao-ve-moi-truong-3294700.html",
"http://giaitri.vnexpress.net/photo/trong-nuoc/kyo-york-chup-anh-keu-goi-bao-ve-moi-truong-3294700.html",
"http://giaitri.vnexpress.net/tin-tuc/gioi-sao/trong-nuoc/trong-hieu-idol-ngu-guc-trong-long-bo-3294837.html",
"http://giaitri.vnexpress.net/photo/trong-nuoc/ngoc-diem-khoe-con-gai-5-tuoi-3294807.html",
"http://giaitri.vnexpress.net/photo/trong-nuoc/con-trai-truong-quynh-anh-do-danh-con-gai-xuan-lan-3294397.html",
"http://giaitri.vnexpress.net/tin-tuc/gioi-sao/trong-nuoc/huong-ly-toi-khong-ngac-nhien-khi-chien-thang-next-top-3294195.html",
"http://giaitri.vnexpress.net/tin-tuc/gioi-sao/trong-nuoc/dam-vinh-hung-hat-o-le-cuoi-cua-40-doi-vo-chong-khuyet-tat-3294598.html",
"http://giaitri.vnexpress.net/photo/trong-nuoc/vo-chong-tuan-hung-du-dam-cuoi-vu-duy-khanh-3294280.html",
"http://giaitri.vnexpress.net/photo/trong-nuoc/diem-my-9x-khoe-hinh-the-khi-tap-vo-3293926.html",
"http://giaitri.vnexpress.net/tin-tuc/gioi-sao/trong-nuoc/luong-viet-quang-toi-that-bai-vi-qua-tu-tin-vao-giong-hat-3292227.html",
"http://giaitri.vnexpress.net/tin-tuc/gioi-sao/trong-nuoc/sao-viet-buc-xuc-vi-bi-su-dung-hinh-anh-trai-phep-3293246.html",
"http://giaitri.vnexpress.net/photo/trong-nuoc/ha-tran-om-con-nhun-nhay-theo-nhac-duoi-mua-3293618.html",
"http://giaitri.vnexpress.net/tin-tuc/gioi-sao/trong-nuoc/cuoc-song-sau-bao-benh-cua-chu-van-quenh-3291824.html",
}
err := ValidateArticle(article, &[]string{"~~~REMOVED~~~"})
if err != nil {
t.Error(err)
}
}
func Test_EconomistCom(t *testing.T) {
article := Article{
Domain: "economist.com",
Title: "Renting hotel rooms by the hour: A quick in and out",
MetaDescription: "A Spanish hotel-reservation platform that allows customers to book rooms in three hour slots is looking to expand into Britain.",
CleanedText: "BYHOURS, a Spanish hotel-reservation platform that allows customers to rent rooms in three-hour slots, is looking to expand into Britain. Travelmole p that the website aims to sign up 25 hotels in the country by the end of the month, although so far only six have taken the plunge.\n\nMany people, when bringing to mind short-stay hotel rooms, will no doubt picture businessmen with their cinq-à-septs or, perhaps, company a little more transactional than that. Banish such grubby thoughts from your minds; having the option of booking a bedroom for three hours is a great and practical idea.\n\nIt is no coincidence that several of the establishments that have signed up with ByHours are close to airports and train stations. How often have you had several hours to kill at an airport and longed for a place to shower and snooze? And Gulliver has written before about that horrible dead time when, having checked out of a hotel in the morning, with your flight not until late in the evening, you have ages to kill wandering around a strange town dragging a wheely-bag. Then there are those day trips when you fly in to town at some ungodly early hour and are scheduled to fly out at an equally uncivilised late one; how much more pleasant if you could pop your head down for a few hours in the afternoon? In fact you needn’t even be a visitor. Back when Gulliver's daughter was a sleep-averse baby, he would have paid handsomely for the chance to close his eyes for an hour in a short-stay hotel during his lunch break.\n\nIt is also easy to see why it would appeal to hotels, which could sweat their assets more, filling gaps between guests checking out and in. According to Travelmole, in Spain last year more than 150,000 bookings were made through ByHours at more than 1,500 hotels. However, for the consumer the big drawback would appear to be pricing. Prices for a three-hour stay in London tomorrow start at €50 and quickly hit the hundreds. That is understandable. By its nature it is often likely to be a last-minute purchase, and hotels will obviously price very short reservations at a premium. But the more hotels that sign up, the easier it will be to find something more budget friendly.",
MetaKeywords: "",
CanonicalLink: "http://www.economist.com/blogs/gulliver/2015/04/renting-hotel-rooms-hour",
TopImage: "https://www.economist.com/sites/default/files/images/guliver.png",
MetaLang: "en",
}
article.Links = []string{
"http://www.travelmole.com/news_feature.php?news_id=2016292",
"http://content.time.com/time/magazine/article/0,9171,843018,00.html",
"http://www.economist.com/blogs/gulliver/2013/04/surreptitious-snoozing",
}
err := ValidateArticle(article, &[]string{"~~~REMOVED~~~"})
if err != nil {
t.Error(err)
}
}
func Test_EditionCnnCom(t *testing.T) {
article := Article{
Domain: "edition.cnn.com",
Title: "What if you could make anything you wanted?",
MetaDescription: "Massimo Banzi's pocket-sized open-source circuit board has become a key building block in the creation of a huge variety of innovative devices.",
CleanedText: "In the 20th century, getting your child a toy car meant a trip to a shopping mall.",
MetaKeywords: "",
CanonicalLink: "http://www.cnn.com/2012/07/08/opinion/banzi-ted-open-source/index.html",
TopImage: "http://i2.cdn.turner.com/cnn/dam/assets/120706022111-ted-cnn-ideas-massimo-banzi-00003302-story-top.jpg",
MetaLang: "en",
}
article.Links = []string{
"http://blog.ted.com/2012/06/26/open-source-your-projects-and-upload-them-to-space-massimo-banzi-at-tedglobal-2012/",
"http://www.cnn.com/video/#/video/us/2012/07/06/ted-massimo-banzi-arduino.ted",
"http://gizmodo.com/5822319/a-chilean-teen-tweets-about-earthquakes-better-than-his-whole-government",
"http://mattrichardson.com/blog/2011/08/17/the-enough-already/",
"http://www.botanicalls.com/",
"http://code.google.com/p/arducopter/wiki/ArduCopter",
"http://www.ted.com/talks/boaz_almog_levitates_a_superconductor.html ",
"http://www.ted.com",
"http://dontapscott.com/",
"http://www.ted.com/talks/don_tapscott_four_principles_for_the_open_world_1.html",
"http://www.youtube.com/watch?v=yNAGkSbt1xI",
"http://genspace.org/person/Ellen%20D./Jorgensen,%20Ph.D.",
"http://www.marcgoodman.net/",
"http://www.nyls.edu/faculty/faculty_profiles/beth_simone_noveck",
"http://itp.tisch.nyu.edu/object/ShirkyC.html",
"http://www.ted.com/talks/clay_shirky_how_cognitive_surplus_will_change_the_world.html",
"http://edition.cnn.com/2012/06/15/world/europe/uk-school-dinner-blog/index.html",
"http://www.twitter.com/CNNOpinion",
"http://www.facebook.com/CNNOpinion",
}
err := ValidateArticle(article, &[]string{"~~~REMOVED~~~"})
if err != nil {
t.Error(err)
}
}
func Test_EntrepreneurCom(t *testing.T) {
article := Article{
Domain: "entrepreneur.com",
Title: "6 Thoughts on Why Facing Your Fears Could Help You Achieve Massive Success",
MetaDescription: "Fire-breathing dragons are a good reason to be afraid. Business fears, maybe not so much.",
CleanedText: "Everyone has fears. They’re important, and they’ve helped keep us alive throughout our evolution. Think about the fears\u00a0characters understandably\u00a0feel at certain points in\u00a0Game of Thrones, the hugely successful HBO dramatic series which\u00a0combines elements of medieval times and fantasy. We're talking outrageously murderous kings here, plus scheming\u00a0lords and ladies. Large men with even larger swords. Even fire-breathing dragons.\n\nRelated:\u00a0Why Fear Is the Entrepreneur's Best Friend\n\nIn Season One of GOT, a great line\u00a0illustrates the point about fears perfectly. The speaker is\u00a0Robb Stark, eldest son of the lord of Winterfell and generally a good guy, who\u00a0decides to declare war and march south to Kings Landing, the capital of the Seven (usually warring) Kingdoms and home to\u00a0a lot more of those men with swords . Theon\u00a0Greyjoy, the son of another royal house,\u00a0asks Stark if he’s afraid. And Stark, his hands trembling, replies,\u00a0“I guess I must be.” To which\u00a0Greyjoy’s response is perfect:\u00a0“Good, that means you’re not stupid.”\n\nIt certainly was appropriate for the denizens of GOT's medieval era to be afraid, but does the same apply to you? For, while fear was an important factor in our hereditary past, in our modern day and age, our fears today\u00a0are often based more in psychology\u00a0than\u00a0actual physical threats. Drawing on some of the books I've enjoyed, I offer\u00a0six thoughts on why facing your fears will assist you in creating massive success.\n\nI've had a lot of worries in my life, most of which never happened.\u00a0- -Mark Twain\n\nWhen you take the time to actually define your fears, you\u00a0learn to separate fact from fiction. This is an important distinction. Some things you’re afraid of will be valid, but many will be mental worst-case scenarios that have simply spiraled further in your mind than they ever will or would in reality.\n\nWhat about the fears on your list that you’ve defined that are actually valid, like losing a client or\u00a0employee, gettng backlash from a layoff\u00a0or encountering some other tangible fear?\u00a0Easy. When you face fears that have merit -- now that you’ve defined them --\u00a0you can come up with an action plan of responses to mitigate the damages.\n\nThink of this list as your \"fear emergency\u00a0plan.\" You know what you’d do in the case of a fire or earthquake, so why not enact a plan of appropriate responses you could take against some of your more valid business\u00a0fears?\n\nRelated:\u00a07 Ways to Think Differently About Fear\n\nBran thought about it. \"Can a man still be brave if he's afraid?\" \"That is the only time a man can be brave,\"\u00a0his father told him.” -- George R.R. Martin, series author, A Song of Ice and Fire, on which HBO's GOT series is based.\n\nPerhaps I’m just missing Game of Thrones in the offseason, but this quote really struck me and is an important facet of facing your fears. You don’t develop bravery and courage in the good times, you develop them when you actually confront fears. If you were once afraid of starting your own business, but did it anyway, you know the terror, but also the reward, that comes from facing fears head on. Your courage grows with each fear you face.\n\nThere is wisdom that comes from the experience of working through fears. Some of your fears may have even come true. If you are a business owner and have seen your business falter or fail, perhaps you’ve already lived through adversity. The silver lining of these experiences is that you learn from them. Wisdom comes from all of life’s experiences, but the fearful or bad ones in particular teach us great lessons. Wisdom is always the by-product of facing your fears, and that’s an important quality to develop.\n\nDealing with fears helps your develop compassion. When you yourself have been afraid,\u00a0you’re more likely to have patience and feel compassion toward others experiencing similar situations. After all, we all want a good life. When you push hard for what you want, and experience the joys and failures of success, you learn compassion you can use to help others push through their early fears.\n\nYou can put yourself in\u00a0the shoes of someone who is just starting out, and that empathy can help guide that person to have deeper courage.\n\n“Life doesn't get easier or more forgiving;\u00a0we get stronger and more resilient.” -- Steve Maraboli, Life, the Truth, and Being Free\n\nResilience comes from facing your fears. You become better than your surroundings and transform yourself above the fear and into bigger and bigger success. Resiliience starts with you, and it begins in your mind. Face your fears and learn to rise to face whatever is in front of you.\n\nRelated:\u00a0What Companies Can Learn From 'Game of Thrones' When Hiring Their Next Chief Information Officer",
MetaKeywords: "Growth Strategies,Fear,Success Stories,Courage",
CanonicalLink: "http://www.entrepreneur.com/article/252739",
TopImage: "https://assets.entrepreneur.com/content/3x2/822/20151112203147-fire-breathing-dragon.jpeg",
MetaLang: "en",
}
article.Links = []string{
"http://www.entrepreneur.com/article/239581",
"https://www.youtube.com/watch?v=fNxvFgysbvU",
"http://www.entrepreneur.com/article/244277",
"http://www.entrepreneur.com/article/247456",
}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
func Test_ExampleCom(t *testing.T) {
article := Article{
Domain: "example.com",
Title: "Example HTML Page TITLE",
MetaDescription: "Example page for testing",
CleanedText: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.\n\nexample 1 link content\n\nexample 2 link content\n\nDuis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.\n\nSed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo.",
MetaKeywords: "example,testing",
CanonicalLink: "http://www.example.com/index.html",
TopImage: "/example_top_image.png",
MetaLang: "en",
}
article.Links = []string{
"http://www.example.com/page1.html",
"http://www.example.com/page2.html",
}
removed := []string{
"~HTMLComment~",
"~div_id_hidden~",
"~div_class_hidden~",
"~div_name_hidden~",
"~style_display_none~",
"~style_visibility_hidden~",
"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
// Facebook photo
func Test_FacebookCom(t *testing.T) {
article := Article{
Domain: "facebook.com",
Title: "Facebook - Facebook's Photos",
MetaDescription: "Stay connected with all of your groups with the new Facebook Groups app. Learn more: http://www.facebookgroups.com",
CleanedText: "",
MetaKeywords: "",
CanonicalLink: "https://www.facebook.com/facebook/photos/a.376995711728.190761.20531316728/10153398878696729/",
TopImage: "https://fbcdn-sphotos-g-a.akamaihd.net/hphotos-ak-xpa1/v/t1.0-9/p180x540/10408016_10153398878696729_8237363642999953356_n.png?oh=c6ae71220447f363ec41ea54c38341e1&oe=55B6D827&__gda__=1436749528_5c72e92a5105c1cc6df97163a64e72ce",
MetaLang: "en",
}
article.Links = []string{
"https://www.facebook.com/facebook?fref=photo",
"http://l.facebook.com/l.php?u=http%3A%2F%2Fwww.facebookgroups.com%2F&h=gAQEbndf0&enc=AZNwbqa7wrhRCkIAQcDAt9ivI6lNENnpagDgNd4WzF4di3sKJDzKaxBVXeEChFPdrgWkyEHV0H7Kj9a3Y2PWgHbuGr2k_yamwC5KvANw_2Mq5X8ySXJaGXXj22haJvHJhrw-5IFcBmFwRJnUG1t9DHx9&s=1",
}
err := ValidateArticle(article, &[]string{"~~~REMOVED~~~"})
if err != nil {
t.Error(err)
}
}
func Test_FocusDe(t *testing.T) {
article := Article{
Domain: "focus.de",
Title: "Landespolizeidirektion: #Kuschelwuschel ist wieder bei seiner Familie!",
MetaDescription: "Ihr seid echt Bärenstark!\n\n Dank eurer Mithilfe hat die Familienzusammenführung geklappt.",
CleanedText: "",
MetaKeywords: "Regional Nachrichten , Thüringen, Landespolizeidirektion",
CanonicalLink: "https://www.focus.de/regional/thueringen/landespolizeidirektion-kuschelwuschel-ist-wieder-bei-seiner-familie_id_8799171.html",
TopImage: "https://p5.focus.de/img/thueringen/crop8799170/1662713622-w1200-h627-o-q75-p5/8ddd6fab-459a-47b8-9598-6d29df6566d4.jpg",
MetaLang: "de",
}
//article.Links = []string{
//}
err := ValidateArticle(article, &[]string{"~~~REMOVED~~~"})
if err != nil {
t.Error(err)
}
}
func Test_ForbesCom(t *testing.T) {
article := Article{
Domain: "forbes.com",
Title: "The World's Most Expensive Passports [Infographic]",
MetaDescription: "Passports are valuable and expensive items, with the price of applying for one varying tremendously by nationality. The U.S. passport may seem expensive with a $110 application fee and a $25 acceptance fee adding up to $135 in total. According to a report by Go Euro, however, American travellers actually [...]",
CleanedText: "",
MetaKeywords: "Lifestyle,Lists,On The Move,Travel",
CanonicalLink: "http://www.forbes.com/sites/niallmccarthy/2015/11/13/the-worlds-most-expensive-passports-infographic/",
TopImage: "http://blogs-images.forbes.com/niallmccarthy/files/2015/11/20151109_Passports_Fo.jpg",
MetaLang: "en",
}
//article.Links = []string{""}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
func Test_FoxNewsCom(t *testing.T) {
article := Article{
Domain: "foxnews.com",
Title: "Party insiders give Clinton early, commanding delegate edge",
MetaDescription: "Hillary Rodham Clinton has locked up public support from half of the Democratic insiders who cast ballots at the party's national convention, giving her a commanding advantage over her rivals for the party's presidential nomination.",
CleanedText: "Published November 13, 2015 Associated Press\n\nHillary Rodham Clinton has locked up public support from half of the Democratic insiders who cast ballots at the party's national convention, giving her a commanding advantage over her rivals for the party's presidential nomination.\n\nClinton's margin over Vermont Sen. Bernie Sanders and former Maryland Gov. Martin O'Malley is striking. Not only is it big, but it comes more than two months before primary voters head to the polls -- an early point in the race for so many of the people known as superdelegates to publicly back a candidate.\n\n\"She has the experience necessary not only to lead this country, she has experience politically that I think will help her through a tough campaign,\" said Unzell Kelley, a county commissioner from Alabama.\n\n\"I think she's learned from her previous campaign,\" he said. \"She's learned what to do, what to say, what not to say -- which just adds to her electability.\"\n\nThe Associated Press contacted all 712 superdelegates in the past two weeks, and heard back from more than 80 percent. They were asked which candidate they plan to support at the convention next summer.\n\nThe 712 superdelegates make up about 30 percent of the 2,382 delegates needed to clinch the Democratic nomination. That means that more than two months before voting starts, Clinton already has 15 percent of the delegates she needs.\n\nThat sizable lead reflects Clinton's advantage among the Democratic Party establishment, an edge that has helped the 2016 front-runner build a massive campaign organization, hire top staff and win coveted local endorsements.\n\nSuperdelegates are convention delegates who can support the candidate of their choice, regardless of who voters choose in the primaries and caucuses. They are members of Congress and other elected officials, party leaders and members of the Democratic National Committee.\n\nClinton is leading most preference polls in the race for the Democratic nomination, most by a wide margin. Sanders has made some inroads in New Hampshire, which holds the first presidential primary, and continues to attract huge crowds with his populist message about income inequality.\n\nBut Sanders has only recently started saying he's a Democrat after a decades-long career in politics as an independent. While he's met with and usually voted with Democrats in the Senate, he calls himself a democratic socialist.\n\n\"We recognize Secretary Clinton has enormous support based on many years working with and on behalf of many party leaders in the Democratic Party,\" said Tad Devine, a senior adviser to the Sanders campaign. \"But Sen. Sanders will prove to be the strongest candidate, with his ability to coalesce and bring young people to the polls the way that Barack Obama did.\"\n\n\"The best way to win support from superdelegates is to win support from voters,\" added Devine, a longtime expert on the Democrats' nominating process.\n\nThe Clinton campaign has been working for months to secure endorsements from superdelegates, part of a strategy to avoid repeating the mistakes that cost her the Democratic nomination eight years ago.\n\nIn 2008, Clinton hinged her campaign on an early knockout blow on Super Tuesday, while Obama's staff had devised a strategy to accumulate delegates well into the spring.\n\nThis time around, Clinton has hired Obama's top delegate strategist from 2008, a lawyer named Jeff Berman, an expert on the party's arcane rules for nominating a candidate for president.\n\nClinton's increased focus on winning delegates has paid off, putting her way ahead of where she was at this time eight years ago. In December 2007, Clinton had public endorsements from 169 superdelegates, according to an AP survey. At the time, Obama had 63 and a handful of other candidates had commitments as well from the smaller fraction of superdelegates willing to commit to a candidate.\n\n\"Our campaign is working hard to earn the support of every caucus goer, primary voter and grassroots and grasstop leaders,\" said Clinton campaign spokesman Jesse Ferguson. \"Since day one we have not taken this nomination for granted and that will not change.\"\n\nSome superdelegates supporting Clinton said they don't think Sanders is electable, especially because of his embrace of socialism. But few openly criticized Sanders and a handful endorsed him.\n\n\"I've heard him talk about many subjects and I can't say there is anything I disagree with,\" said Chad Nodland, a DNC member from North Dakota who is backing Sanders.\n\nHowever, Nodland added, if Clinton is the party's nominee, \"I will knock on doors for her. There are just more issues I agree with Bernie.\"\n\nSome superdelegates said they were unwilling to publicly commit to candidates before voters have a say, out of concern that they will be seen as undemocratic. A few said they have concerns about Clinton, who has been dogged about her use of a private email account and server while serving as secretary of state.\n\n\"If it boils down to anything I'm not sure about the trust factor,\" said Danica Oparnica, a DNC member from Arizona. \"She has been known to tell some outright lies and I can't tolerate that.\"\n\nStill others said they were won over by Clinton's 11 hours of testimony before a GOP-led committee investigating the attack on a U.S. consulate in Benghazi, Libya. Clinton's testimony won widespread praise as House Republicans struggled to trip her up.\n\n\"I don't think that there's any candidate right now, Democrat or Republican, that could actually face up to that and come out with people shaking their heads and saying, `That is one bright, intelligent person,\"' said California Democratic Rep. Tony Cardenas.",
MetaKeywords: "Democratic National Committee,Hillary Rodham Clinton,Barack Obama,presidential primary,primary voters,superdelegates",
CanonicalLink: "http://www.foxnews.com/politics/2015/11/13/party-insiders-give-clinton-early-commanding-delegate-edge/",
TopImage: "http://a57.foxnews.com/global.fncstatic.com/static/managed/img/fn2/video/0/0/111215_otr_clinton_1280.jpg",
MetaLang: "en",
}
article.Links = []string{
"http://www.ap.org/",
}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
func Test_GloboesporteGloboCom(t *testing.T) {
article := Article{
Domain: "globoesporte.globo.com",
Title: "Rodrigo Caio treina até nas férias e tenta acelerar retorno aos gramados",
MetaDescription: "Rodrigo Caio treina na esteira durante as férias em Dracena-SP (Foto: Divulgação)Rodrigo Caio quer ganhar tempo na recuperação da lesão que sofreu no joelho esquerdo. Apesar de ter sido liberado pelo departamento médico do São Paulo para as férias, o ...",
CleanedText: "Rodrigo Caio treina na esteira durante as férias em Dracena-SP (Foto: Divulgação) Rodrigo Caio quer ganhar tempo na recuperação da lesão que\n\nsofreu no joelho esquerdo. Apesar de ter sido liberado pelo departamento médico\n\ndo São Paulo para as férias, o jogador vem treinando diariamente para acelerar\n\na recuperação após ser submetido a uma cirurgia.\n\nO zagueiro e volante passa férias com a família em Dracena, interior\n\nde São Paulo, e alterna os períodos de descanso com uma rotina de\n\nexercícios. Ele vem realizando trabalhos de reforço muscular e corridas na\n\nesteira.\n\nO jogador lesionou o joelho esquerdo no dia 2 de agosto,\n\ncontra o Criciúma, no Morumbi, pelo Campeonato Brasileiro, e precisou passar por\n\numa cirurgia. O defensor vinha sendo um dos destaques do São Paulo na\n\ntemporada.\n\nNa avaliação do departamento médico, Rodrigo Caio deve\n\nser liberado para treinos com o elenco e jogos entre fevereiro e março. Com\n\nisso, é provável que seja inscrito pelo técnico Muricy Ramalho para disputar a\n\nfase de grupos da Taça Libertadores.",
MetaKeywords: "notícias, notícia, presidente prudente região",
CanonicalLink: "http://globoesporte.globo.com/sp/presidente-prudente-regiao/noticia/2014/12/rodrigo-caio-treina-ate-nas-ferias-e-tenta-acelerar-retorno-aos-gramados.html",
TopImage: "http://s.glbimg.com/es/ge/f/original/2014/12/26/10863872_894379987249341_2406060334390226774_o.jpg",
MetaLang: "pt",
}
article.Links = []string{
"http://globoesporte.globo.com/atleta/rodrigo-caio.html",
}
err := ValidateArticle(article, &[]string{"~~~REMOVED~~~"})
if err != nil {
t.Error(err)
}
}
func Test_HbrOrg(t *testing.T) {
article := Article{
Domain: "hbr.org",
Title: "Email Is the Best Way to Reach Millennials",
MetaDescription: "It’s still the channel with the highest ROI.",
CleanedText: "With $200 billion in annual buying power by 2017, Millennials have become every brand’s coveted customer. But what’s the best way to reach them?\n\nThe answer is email.\n\nFor all the talk of email being dead — Too much noise! Too much spam! Too many distractions! Snapchat! — email remains\u00a0the standard for digital communication. In fact, Millennials check email more than any other age group, and nearly half can’t even use the bathroom without checking it, according to a\u00a0recent Adobe study.\n\nThat same study\u00a0found nearly 98% of Millennials check their personal email at least every few hours at work, while almost 87% of Millennials check their work email outside of work.\n\nEmail is not only relevant for Millennials, it also happens to remain the channel where direct marketers get the highest ROI ($39 for every dollar spent, according to the Direct Marketing Association). But that doesn’t mean the same old email marketing will work on Millennials. Instead, marketers need to adjust, or run the risk of that dreaded swipe to the trash bin. Consider these ideas the next time you’re planning an email campaign and Millennials are a key part of the audience:\n\nMobile is a must. Millennials are more likely than any other age group to check email on smartphones, with 88% reporting that they regularly using a smartphone to check email. If you’re not mobile first, you’re not putting your Millennial customers first. Responsive design has been a mantra for some time, but if you’re not employing it, you’re alienating an important generation of consumers who live, breathe, and sleep with their mobile devices.\n\nTiming is everything. Looking at opens and clicks won’t get you anywhere without analyzing the day of week and time of day those emails are opened and clicked. For example, we found that Millennials are more likely than any other age group to check email while in bed (45.2%). Why not experiment with sending emails first thing in the morning or late in the evening with content relevant to that time of day?\n\nPictures are worth a thousand words. They’re also an important mechanism for Millennials to filter messages. Why send an email survey asking for written feedback when all you need to do is provide a choice between a smiley face and a frown? Images are an integral part of Millennial language, even in the workplace. A third of Millennials believe it is appropriate to use an emoji when communicating with a direct manager or senior executive, so it’s a safe bet they’re even more comfortable when it comes to emoji from brands. Millennials are thinking and communicating in images, so marketers need to optimize emails for images and allow for quick feedback through emoji.\n\nLess is more . Email marketing to Millennials isn’t about sending more of the same. Many Millennials want to see fewer emails (39%) and fewer repetitive emails from brands (32%). Marketers take note — stop spamming your lists and start marketing to individuals by understanding who they are first.\n\nNot every Millennial communicates the same way, of course. And digital communication is constantly evolving. Nonetheless, for now it seems safe to say that email is here to stay and will remain a critical channel even for reaching mobile customers. Just don’t expect the same old email tactics to work.",
MetaKeywords: "",
CanonicalLink: "",
TopImage: "https://hbr.org/resources/images/article_assets/2015/11/nov15-12-169799513-horz.jpg",
MetaLang: "en",
}
article.Links = []string{
"https://blogs.adobe.com/conversations/2015/08/email.html",
}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
func Test_HuffingtonPostCoUk(t *testing.T) {
article := Article{
Domain: "huffingtonpost.co.uk",
Title: "How We Are Controlling The Future Of TV Scheduling",
MetaDescription: "var isMobile = {\n Android: function() {\n return navigator.userAgent.match(/Android/i);\n },\n BlackBerry: function() {\n return navigator.userAgent.match(/BlackBerry/i);",
CleanedText: "Since its inception, television has been a unifying social force, bringing family, friends and different groups of people together. Even watching television on your own connects you to the multitudes of others watching the same thing across the globe.\n\nTV has come a long way: from black-and-white to colour, from a rare treat accessible to few to a household staple for everyone, from standard definition to tomorrow's ultra-HD screens.\n\nPerceptions of TV audiences have also changed over time. While theorists once believed TV viewers were passive, zombie-like figures transfixed in front of their televisions, numerous studies have proven that TV audiences are engaged, active and critical of the programmes they watch.\n\nIn the last several years, we've seen a dramatic shift that's placed viewers in control of their own scheduling. There's also more choice than ever before when it comes to accessing favourite programmes and watching them when and where they like.\n\n\"There are two simultaneous trends emerging when it comes to our TV watching habits, and they're two opposite trends, which is interesting,\" says Professor Sonia Livingstone OBE, a full professor in the Department of Media and Communications at the London School of Economics.\n\n\"One: we're watching TV on our laptops, tablets and phones, wherever and on whatever.\"\n\nAnd two, somewhat paradoxically, we're seeing a growth in the size of the screen in the living room. People talk about how everyone is watching TV on a 'small screen', but there's also a new viewing growing up around this enormous screen, as well as the more individualised viewing.\"\n\nNow, we watch shows wherever we want, whether it's relaxing in the bath with Corrie characters, catching up with a favourite drama on our phone during a morning commute or settling down in the sitting room every week to enjoy GBBO, gathered around the biggest 'and best' screen in the house. Equally, thanks to the latest in wearable tech, our most beloved television content has become a coveted accessory, accessible with a swipe on our watch.\n\nSubscription-free services like Freeview Play have also given us more options than ever before, with over 60 TV channels, 12 HD channels and over 25 radio stations a remote click away, plus the freedom to catch up on shows from the BBC, ITV, Channel 4 and Channel 5. Other services like Netflix and Amazon Prime also give us the opportunity to watch shows we missed the first time around - in one sitting, if we so desire! - while simultaneously introducing us to new and original programming.\n\n\"We keep fearing that people won't talk to each other anymore,\" says Professor Livingstone. \"There's the choice to watch separately and the choice to come together, whether it's binge viewing or the greater choice of programmes than ever before.\"\n\nAll of this choice has had a positive impact on TV consumers, according to Professor Livingstone.\n\n\"Most of the evidence is that people are feeling empowered and delighted. There's been an enormous welcome from people about the joys of having so much control and more choice than ever before.\"\n\nPeople are also prepared to pay to improve their television watching experience, whether that's spending on bigger HD screens or subscription services.\n\nWhile scheduling is fairly unimportant for younger generations, the middle-aged and young elderly population that remembers how television used to be is growing, so scheduling continues to play an important role for them.\n\nFor those younger generations, the definition of whether TV is 'a five minute clip of a beauty vlogger's latest haul on YouTube or a critically respected docudrama' calls into question what TV viewing really means these days.\n\n\"People have been saying for a while that scheduling is dead, but there's no getting rid of schedule for the 40s or 50-pluses who absolutely adhere to traditions of what to watch and when,\" says Professor Livingstone.\n\nRapidly emerging trends, like the increase in individual TV consumption across new tech and the importance of the living room big screen as the centrepoint of family life, ensure that the landscape of television scheduling is in constant flux and the future of television remains uncertain.\n\nOne thing we know? We'll still be watching.",
MetaKeywords: "changing, channels:, how, we, are, controlling, the, future, of, tv, scheduling, uk, entertainment",
CanonicalLink: "http://www.huffingtonpost.co.uk/2015/10/29/how-we-are-changing-the-future-of-tv-scheduling_n_8303736.html",
TopImage: "http://i.huffpost.com/gen/3507100/images/o-TELEVISION-REMOTE-CONTROL-facebook.jpg",
MetaLang: "en",
}
//article.Links = []string{""}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
func Test_HuffingtonPostJp(t *testing.T) {
article := Article{
Domain: "huffingtonpost.jp",
Title: "クロマグロ残り2匹 葛西臨海水族園の大量死は未だに原因不明",
MetaDescription: "クロマグロやカツオ類が大量死した問題で、葛西臨海水族園(東京都江戸川区)は3日、病理検査の結果、海の養殖魚を大量死させることで知られる2種類のウイルスが原因ではないことが確認されたと発表した。",
CleanedText: "",
MetaKeywords: "クロマグロ残り2匹 葛西臨海水族園の大量死は未だに原因不明, japan",
CanonicalLink: "http://www.huffingtonpost.jp/2015/03/03/tuna-death_n_6796602.html",
TopImage: "http://i.huffpost.com/gen/2678692/images/o-TUNA-DEATH-facebook.jpg",
MetaLang: "ja",
}
//article.Links = []string{""}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
func Test_IncCom(t *testing.T) {
article := Article{
Domain: "inc.com",
Title: "Why 2015 Was Rent the Runway's Biggest Year So Far",
MetaDescription: "A new business model, brick-and-mortar stores, and $70 million in venture capital funding. Here's how this business lit up runways (and sidewalks) in 2015.",
CleanedText: "p\n\nNewsletters\n\nFollow\n\np\n\nInc. Wire\n\nStartup\n\nGrow\n\nAsk Marcus Lemonis\n\nMoney\n\nGrowth Strategies\n\nLead\n\nInnovate\n\nInc. Events & Offers\n\nInc. Partner Events & Offers\n\nForgot Password?\n\nEnter your email to reset your password\n\nOr sign up using:\n\nNew member? Sign up now.\n\nSign in if you're already registered.\n\np\n\nA new business model, brick-and-mortar stores, and $70 million in venture capital funding. Here's how this business lit up runways (and sidewalks) in 2015.\n\nWRITE A COMMENT\n\nRECOMMENDED\n\nMark Cuban: What I Would Do If I Were President\n\nSamuel Adams Creator Jim Koch on Scaling up, One Barrel at a Time\n\nWhy America Needs a CEO in the White House\n\n2 Traits That Give Veterans an Entrepreneurial Advantage\n\n3 Key Traits Shared by the Most Successful Business Leaders\n\nWhy Startups Need to Be Able to Survive Without Their Founders\n\nRussell Simmons: Why It's Important to Do What You Love\n\nMark Cuban: How You'll Know You're Ready to Launch\n\nThe 4 Mentors Every Entrepreneur Needs\n\nDaymond John: 5 Traits That Make a Good Business Leader\n\nHow Marcus Lemonis Knows If You're Making Good Money\n\nArianna Huffington: The Wake-Up Call That Helped Arianna Huffington Learn to Thrive\n\nThe Making of Inc.'s Jessica Alba Cover Story\n\nSecrets of Wealth and Success From Tony Robbins\n\nBarbara Corcoran's 8 Lessons for Entrepreneurs\n\nMint Founder: How to Learn From Your Early Mistakes\n\nOne Nightly Productivity Tip to Get the Most out of Your Day\n\nHow to Keep the Fear of Failure From Stalling Personal Growth\n\nWhy Entrepreneurship Is a 24/7 Lifestyle\n\nWhy the Only Guaranteed Path to Success Is Through Hard Work and Hustle\n\nAdvertisement",
MetaKeywords: "",
CanonicalLink: "http://www.inc.com/zoe-henry/rent-the-runway-2015-company-of-the-year-nominee.html",
TopImage: "http://www.inc.com/uploaded_files/image/970x450/OUT63313304-web_70674.jpg",
MetaLang: "en",
}
article.Links = []string{
"http://www.inc.com/",
"https://magazine.inc.com/servlet/ConvertibleGateway?cds_mag_code=ICM&cds_page_id=136768&cds_response_key=XB5KNNGF1",
"https://www.facebook.com/Inc",
"https://twitter.com/inc",
"https://www.linkedin.com/company/inc--magazine",
"https://plus.google.com/+incmagazine",
"https://www.pinterest.com/incmagazine/",
"http://www.youtube.com/user/incmagazine?sub_confirmation=1",
"https://instagram.com/incmagazine",
"https://flipboard.com/@incmagazine",
"http://www.inc.com/mark-cuban/what-i-would-do-if-i-were-president.html",
"http://www.inc.com/jim-koch/samuel-adams-creator-on-scaling-up-one-barrel-at-a-time.html",
"http://www.inc.com/donny-deutsch/why-america-needs-a-ceo-in-the-white-house.html",
"http://www.inc.com/norm-brodsky/2-traits-that-give-veterans-a-leg-up-as-entrepreneurs.html",
"http://www.inc.com/donny-deutsch/3-key-traits-shared-by-the-most-successful-business-leaders.html",
"http://www.inc.com/gary-vaynerchuk/askgaryvee-episode-84-surviving-without-a-founder.html",
"http://www.inc.com/russell-simmons/why-its-important-to-do-what-you-love.html",
"http://www.inc.com/mark-cuban/how-youll-know-youre-ready-to-launch.html",
"http://www.inc.com/kim-kaupe/4-mentors-that-every-entrepreneur-needs.html",
"http://www.inc.com/daymond-john/5-traits-that-make-a-good-business-leader.html",
"http://www.inc.com/marcus-lemonis-bees-knees-spicy-honey.html",
"http://www.inc.com/arianna-huffington/founders-forum-how-huffington-learned-to-thrive.html",
"http://www.inc.com/jessica-alba/the-making-of-inc-jessica-alba-cover-story.html",
"http://www.inc.com/tony-robbins/tony-robbins-reveals-his-secrets-on-wealth-success-and-financial-freedom.html",
"http://www.inc.com/barbara-corcoran/eight-lessons-for-entrepreneurs.html",
"http://www.inc.com/aaron-patzer/how-to-learn-from-early-mistakes.html",
"http://www.inc.com/adam-miller/one-nightly-productivity-tip-to-get-the-most-out-of-your-day.html",
"http://www.inc.com/jen-groover/how-to-keep-failure-away-from-personal-growth.html",
"http://www.inc.com/ravin-gandhi/why-entrepreneurship-is-a-lifestyle.html",
"http://www.inc.com/gary-vaynerchuk/askgaryvee-episode-86-hard-work-and-hustle.html",
}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
func Test_kacherenCom(t *testing.T) {
article := Article{
Domain: "kacheren.com",
Title: "盛夏特惠席卷!长安跨越购车大促销!",
MetaDescription: "盛夏特惠席卷!长安跨越购车大促销! ,卡车人论坛",
CleanedText: "",
MetaKeywords: "盛夏特惠席卷!长安跨越购车大促销!",
CanonicalLink: "http://www.kacheren.com/bbs/read-htm-tid-143983-page-1.html",
TopImage: "http://www.kacheren.com/bbs/static/image/common/back_footer.png",
MetaLang: "zh",
}
//article.Links = []string{}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
func Test_LinkedinCom(t *testing.T) {
article := Article{
Domain: "linkedin.com",
Title: "An Unexpected Way to Achieve a Better Work-Life Balance",
MetaDescription: "Work-life balance. Everyone talks about it. And everyone struggles to achieve it. Yet finding a reasonable work-life balance is easier than you think",
CleanedText: "Work-life balance. Everyone talks about it. And everyone struggles to achieve it.\n\nYet finding a reasonable work-life balance is easier than you think. While it's true the equilibrium point is constantly shifting, most of the same attitudes, perspectives, and skills apply to both \"work\" and \"life.\"\n\nSo why not take advantage of that fact? Pick the right \"life\" pursuits and they inform and enhance your professional skills -- and add a healthy dose of perspective and humility along the way.\n\nIn my case I like to take on extremely difficult (at least for me) physical goals. (Granted my approach to goal achievement in general is a little unconventional. Just like\u00a0Fight Club,\u00a0the first rule of achieving a goal is\u00a0you don't talk about achieving that goal. And achieving a goal has a lot less to do with the goal itself and\u00a0a lot more to do with the routine you develop\u00a0to support that goal.)\n\nSo a few years ago, after just four months of training, I rode the\u00a0Alpine Loop Gran Fondo, a 92-mile, four-mountain ride that included 11,000 feet of climbing. (Those four months felt like a lifetime, though, since pro mountain biker Jeremiah Bishop trained me. But then again I never could have been ready without him.)\n\nAfter a few years of cycling I got tired of being cycling skinny -- 6' tall, 150 lbs is not a particularly good look -- and decided to see if I could pull off some semblance of the\u00a0\"movie star becomes an action hero\"\u00a0physical transformation. I gained over 20 pounds, lost a few percentage points of body fat, and got a lot stronger. (That training sucked too, since\u00a0Jeffrey Del Favero\u00a0of\u00a0Bodybuilding.com\u00a0created my program, but then again I never could have done it without him.)\n\nSo why do I do take on (feel free to insert your own adjective) personal challenges? And how does that help me professionally? It's all about the habits, skills, and perspectives gained. Here are some reasons.\n\nSuccess is ultimately based on numbers. Sure, you can try to \"hack\" a goal. Sure, you can look for shortcuts. (People have\u00a0built entire careers\u00a0off the premise.) But eventually achieving a huge goal is all about volume and repetition.\n\nWant to eventually ride a tough gran fondo? You'll have to ride hundreds of miles along the way. Want to go from only being able to do three pull-ups to eventually being able to do four sets of twenty? You'll have to lift a ton of weight along the way.\n\nThe same is true for professional success; it's largely based on doing the work. Want twenty new customers? Expect to cold call two or three hundred prospects. Want to hire a superstar? Expect to screen dozens and then interview ten or fifteen people.\n\nThe surest path to success is to do an incredible amount of work. If you're willing to do the work, you can succeed at almost anything.\n\nThe armor that protects us eventually destroys us. We all wear armor. That armor protects us but also, over time, wears us down.\n\nOur armor is primarily forged by success. Every accomplishment adds an additional layer of protection from vulnerability. In fact, when we feel particularly insecure we unconsciously strap on more armor so we feel less vulnerable:\n\nArmor protects when we're unsure, tentative, or at a perceived disadvantage. Our armor says, \"That's okay; I may not be good at this... but I'm really good at\u00a0that.\"\n\nOver time armor also encourages us to narrow our focus to our strengths. That way we stay safe. The more armor we put on the more we can hide our weaknesses and failings--from others and from ourselves.\n\nWe use our armor all the time. I use my armor all the time--I feel sure more than you. But I get really tired of wearing it.\n\nWhen I ride a bike the guy who passes me doesn't care if I've ghostwritten bestsellers or drive a fancy car or live in a nice neighborhood. At the gym, the guy who lifts more than me also doesn't care about any of that stuff. He's stronger and fitter than me. Period.\n\nIn those situations no amount of armor, real or imagined, can protect me. I'm just a guy on a bike. I'm just a guy at the gym. I'm just me.\n\nBeing just me is pretty scary.\n\nBut being who you really are is something we all need to do more often. It keeps things in perspective. It reminds us that we can always be better. It reminds us that no matter how good we think we are at something there is always someone who is a lot better.\n\nAnd that's not depressing -- that's motivating.\n\nGrace is an awesome feeling -- one we can never experience enough. Outstanding athletes exist in a state of grace, a place where calculation and strategy and movement happen almost unconsciously. Great athletes can focus in a way that, to us, is unrecognizable because through skill, training, and experience their ability to focus is nearly effortless.\n\nWe've all felt a sense of grace, if only for a few precious moments, when we performed better than we ever imagined possible... and realized what we assumed to be limits weren't really limits at all.\n\nThose moments don't happen by accident, though. Grace is never given; grace must be earned through discipline and training and sacrifice.\n\nI want to ride up a mountain and experience the feeling that I can climb and climb and climb and I don't have to think about anything because I can just\u00a0go....\n\nI want to struggle with a weight and experience the feeling that I can do a few more reps because I know, without a doubt, I always have a little more in me...\n\nAnd I want to sometimes write almost effortlessly and without thinking because years of effort and practice have brought me to a place where occasionally I am the writer I would like to be...\n\nAll those are moments of grace. They're awesome. They're amazing.\n\nAnd they feed off each other because the confidence you build after experiencing a moment of grace in one pursuit helps you keep pushing when the going gets tough in other pursuits.\n\nWith work, \"then\" is always better than \"now.\"\u00a0 \"Now\" and \"then\" are wonderful words when they appear in the same sentence.\n\nWhen you work to improve at something -- especially in the beginning stages -- \"now\" is often a terrible place. At one point my \"now\" was riding like an asthmatic hippo. At one point my \"now\" was doing four dips and feeling like I was tearing my chest apart.\n\nBut with time and effort my \"now\" was transformed. I could ride\u00a0with more speed, power, and confidence. I could do\u00a0sets of ten, then twenty, then thirty dips. I was able to look back with satisfaction at a \"now\" I had transformed into a vastly inferior \"then.\"\n\nThink about something you wanted to do. Then think about where you would be\u00a0now\u00a0if you had actually gotten started on it\u00a0then.\n\nWhen you do the work, then always pales in comparison to now: family, business, and every aspect of your life. When you don't do the work, now is just like then -- except now you also get to live with regret.\n\nQuitting is a habit anyone can learn to break. We're all busy. Each of us face multiple, ongoing demands. Every day we are forced a number of times to say, \"That's not perfect, but it works... and I need to move on to something else.\"\n\nStopping short of excellence is something we are not just forced to do but are also\u00a0trained\u00a0to do. Most of the time we have no choice so we get really good at \"quitting.\"\n\nI'm really good at quitting. I raised wonderful kids and did a good job... but I know I could have done more. I've built a decent business... but I know I could have done more. I've tackled challenges before and tried really hard... but I know I could have done more.\n\nWhere physical challenges are concerned there are hundreds if not thousands of times I want to quit. Training is hard and only gets harder. Balancing family and work and everything else is hard and only gets harder.\n\nAt weak moments, struggle shatters our resolve and make us want to quit.\n\nIt's hard not to stop, by choice or otherwise, at \"good enough.\" But sometimes, if the goal is big enough, we have to be\u00a0great: not great compared to other people... but great compared to ourselves.\n\nThat comparison is the only comparison that really matters and is the best reason of all to try to accomplish more than you -- or anyone around you -- ever thought possible.\n\nWhen you succeed, you become something you were not. And then you get to do it again, and become\u00a0something else you once were not -- but definitely are now.\n\nI also write for Inc.com:\n\nCheck out my book of personal and professional advice,\u00a0TransForm: Dramatically Improve Your Career, Business, Relationships, and Life -- One Simple Step At a Time. (PDF version here,\u00a0Kindle version here,\u00a0Nook version here.)\n\nIf after 10 minutes you don't find at least 5 things you can do to make your life better I'll refund your money.\n\nThat way you have nothing to lose... and everything to gain.",
MetaKeywords: "",
CanonicalLink: "https://www.linkedin.com/pulse/unexpected-way-achieve-better-work-life-balance-jeff-haden",
TopImage: "http://m.c.lnkd.licdn.com/mpr/mpr/AAEAAQAAAAAAAATuAAAAJGRiODU4MjBjLTFlZTEtNGQ3NS05ZDk1LTZiNjVkYjE5NWZlNA.jpg",
MetaLang: "en",
}
article.Links = []string{
"http://www.inc.com/jeff-haden/silence-the-surprising-way-to-achieve-a-goal.html",
"http://www.inc.com/jeff-haden/an-nearly-foolproof-way-to-achieve-every-goal-you-set-wed.html",
"http://www.alpineloopgranfondo.com/",
"http://www.huffingtonpost.com/2014/12/01/jake-gyllenhaal-southpaw_n_6251010.html",
"https://www.linkedin.com/pub/jeffrey-del-favero/23/b5a/a15",
"http://www.bodybuilding.com/",
"http://fourhourworkweek.com/blog/",
"http://www.inc.com/author/jeff-haden",
"https://gumroad.com/l/YHadh",
"https://gumroad.com/l/YHadh",
"http://amzn.to/1EiaVXV",
"http://www.barnesandnoble.com/w/books/1121702502?ean=2940151263917",
}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
func Test_NavatelanganaCom(t *testing.T) {
article := Article{
Domain: "navatelangana.com",
Title: "బ్రహ్మోస్ క్షిపణిని ప్రయోగం సక్సెస్!",
MetaDescription: "",
CleanedText: "",
MetaKeywords: "",
CanonicalLink: "",
TopImage: "",
MetaLang: "te", // Telegu (India)
}
//article.Links = []string{""}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
/*
func Test_Newschannel20Com(t *testing.T) {
article := Article{
Domain: "newschannel20.com",
Title: "\"Red flag\" gun bill passes house",
MetaDescription: "A new bill would allow law enforcement to temporarily take guns from people reported as a threat by family or friends. The bill has just passed the House and is now on its way to the senate.The Lethal Violence Order of Protection Act says if someone is giv",
CleanedText: "",
MetaKeywords: "Doug Schmidgall,Senate,Kathleen Willis,Law Enforcement,Social Media Posts,Judge",
CanonicalLink: "http://newschannel20.com/news/local/red-flag-gun-bill-passes-house",
TopImage: "",
MetaLang: "en", // currently recognised as "tpi"
}
//article.Links = []string{""}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
*/
func Test_NewsDirectrixRu(t *testing.T) {
article := Article{
Domain: "news.directrix.ru",
// NB: the HTML page declares the wrong charset (windows-1251 instead of UTF-8, so when converting to UTF-8 the russian characters are mangled)
Title: "РћРґРЅРѕРј РёР· парков РІ Багдаде произошеР", //"Одном из парков в Багдаде произошел взрыв, погибли семь человек, обзоры сми на бизнес-портале Directrix.ru",
MetaDescription: "РћРґРЅРѕРј РёР· парков РІ Багдаде произошел взрыв, погибли семь человек - новости РёР· категории Р’ РјРёСЂРµ.", //Одном из парков в Багдаде произошел взрыв, погибли семь человек - новости из категории В мире.",
CleanedText: "",
MetaKeywords: "РћРґРЅРѕРј РёР· парков РІ Багдаде произошел взрыв, погибли семь человек, новости Р’ РјРёСЂРµ", //"Одном из парков в Багдаде произошел взрыв, погибли семь человек, новости В мире",
CanonicalLink: "http://news.directrix.ru/odnom_iz_parkov_v_bagdade_proizoshel_vzryv_pogibli_sem_chelovek.html",
TopImage: "http://www.findnews.ru/news/img/8962334_0.jpg",
MetaLang: "ru",
}
//article.Links = []string{""}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
func Test_NyTimesCom(t *testing.T) {
article := Article{
Domain: "nytimes.com",
Title: "How Gun Traffickers Get Around State Gun Laws",
MetaDescription: "The effect of state gun control laws is diluted by a thriving underground market for firearms brought from states with few restrictions.",
CleanedText: "Wash.\n\nWhere guns used in crimes came from\n\nMe.\n\nArrow sizes show the number of guns traced to other states in 2014\n\nMont.\n\nN.D.\n\nMinn.\n\nVt.\n\nOre.\n\nN.H.\n\nIdaho\n\nN.Y.\n\nWis.\n\nS.D.\n\nMass.\n\nMich.\n\nR.I.\n\nWyo.\n\nConn.\n\nPa.\n\nIowa\n\nN.J.\n\nNeb.\n\nNev.\n\nMore than two-thirds of guns connected to crimes in New York and New Jersey were brought in from other states, mostly from the South.\n\nMd.\n\nOhio\n\nDel.\n\nUtah\n\nIll.\n\nW.Va.\n\nColo.\n\nD.C.\n\nInd.\n\nVa.\n\nKan.\n\nMo.\n\nCalif.\n\nKy.\n\nN.C.\n\nTenn.\n\nOkla.\n\n1,184 guns\n\nfrom arizona\n\nN.M.\n\nArk.\n\nS.C.\n\nAriz.\n\nGa.\n\nAla.\n\nMiss.\n\nCriminals in California used about 6,000 guns from other states, mainly from those with few gun-buying restrictions like Arizona and Nevada.\n\nLa.\n\nTexas\n\nCrime rings smuggle guns from Orlando, Fla., to Puerto Rico.\n\nFla.\n\n349 guns\n\nfrom florida\n\nState gun control laws\n\nLENIENT\n\nstrict\n\nPuerto Rico\n\nWhere guns used in crimes came from\n\nArrow sizes show the number of guns traced to other states in 2014\n\nOre.\n\nN.Y.\n\nN.J.\n\nNev.\n\nIllinois\n\nIndiana\n\nCalif.\n\nN.C.\n\nAriz.\n\nS.C.\n\n1,184 guns\n\nfrom arizona\n\nGa.\n\nTexas\n\nFlorida\n\nState gun control laws\n\n349 guns\n\nfrom florida\n\nLENIENT\n\nstrict\n\nPuerto Rico\n\nWhere guns used in crimes came from\n\nWash.\n\nArrow sizes show the number of guns traced to other states in 2014\n\nMe.\n\nMont.\n\nN.D.\n\nMinn.\n\nVt.\n\nOre.\n\nN.H.\n\nIdaho\n\nN.Y.\n\nWis.\n\nS.D.\n\nMass.\n\nMich.\n\nWyo.\n\nPa.\n\nIowa\n\nN.J.\n\nNeb.\n\nNev.\n\nMd.\n\nOhio\n\nUtah\n\nMost guns connected to crimes in New York and New Jersey were brought in from other states, mostly from the South.\n\nIll.\n\nW.Va.\n\nColo.\n\nD.C.\n\nInd.\n\nVa.\n\nKan.\n\nMo.\n\nCalif.\n\nKy.\n\nN.C.\n\nTenn.\n\n1,184\n\nguns from arizona\n\nOkla.\n\nN.M.\n\nArk.\n\nS.C.\n\nAriz.\n\nGa.\n\nAla.\n\nMiss.\n\nLa.\n\nTexas\n\nCriminals in California used about 6,000 guns from other states, mainly from those with few gun-buying restrictions like Arizona and Nevada.\n\n349 guns\n\nfroM\n\nflorida\n\nFla.\n\nState gun control laws\n\nCrime rings smuggle guns from Orlando, Fla., to Puerto Rico.\n\nPuerto Rico\n\nLENIENT\n\nstrict\n\nWhere guns used in crimes came from\n\nWashington\n\nArrow sizes show the number of guns traced to other states in 2014\n\nMe.\n\nMontana\n\nNorth Dakota\n\nMinnesota\n\nVt.\n\nOregon\n\nN.H.\n\nIdaho\n\nNew York\n\nWisconsin\n\nSouth Dakota\n\nMass.\n\nMichigan\n\nR.I.\n\nWyoming\n\nConn.\n\nPa.\n\nIowa\n\nNew Jersey\n\nNeb.\n\nNevada\n\nMd.\n\nOhio\n\nMore than two-thirds of guns connected to crimes in New York and New Jersey were brought in from other states, mostly from the South.\n\nDel.\n\nUtah\n\nIllinois\n\nW.Va.\n\nColorado\n\nD.C.\n\nIndiana\n\nVa.\n\nKansas\n\nMo.\n\nCalifornia\n\nKy.\n\nN.C.\n\nTenn.\n\nOklahoma\n\n1,184 guns\n\nfrom arizona\n\nNew Mexico\n\nArkansas\n\nS.C.\n\nArizona\n\nGeorgia\n\nAlabama\n\nMiss.\n\nCriminals in California used about 6,000 guns from other states, mainly from those with few gun-buying restrictions like Arizona and Nevada.\n\nTexas\n\nCrime rings smuggle guns from Orlando, Fla., to Puerto Rico.\n\nLouisiana\n\nFlorida\n\nState gun control laws\n\n349 guns\n\nfrom florida\n\nLENIENT\n\nstrict\n\nPuerto Rico\n\nIn California, some gun smugglers use FedEx. In Chicago, smugglers drive just across the state line into Indiana, buy a gun and drive back. In Orlando, Fla., smugglers have been known to fill a $500 car with guns and send it on a ship to crime rings in Puerto Rico.\n\nIn response to mass shootings in the last few years, more than 20 states, including some of the nation’s biggest, have passed new laws restricting how people can buy and carry guns. Yet the effect of those laws has been significantly diluted by a thriving underground market for firearms brought from states with few restrictions.\n\nAbout 50,000 guns are found to be diverted to criminals across state lines every year, federal data shows, and many more are likely to cross state lines undetected.\n\nIn New York and New Jersey, which have some of the strictest laws in the country, more than two-thirds of guns tied to criminal activity were traced to out-of-state purchases in 2014. Many were brought in via the so-called Iron Pipeline, made up of Interstate 95 and its tributary highways, from Southern states with weaker gun laws, like Virginia, Georgia and Florida.\n\nNew York\n\nThe Iron Pipeline\n\nPa.\n\nGuns used in recent shootings of New York City police officers were traced to pawn shops in Georgia.\n\nJONESBORO\n\nVa.\n\nNew Jersey\n\n386 guns\n\nN.C.\n\nPERRY\n\nGa.\n\nS.C.\n\nMany guns used in crimes are brought to New York and New Jersey along Interstate 95. In recent years, more guns have started coming from Pennsylvania gun shows, a federal official said.\n\n292 guns\n\nFla.\n\nNew York\n\nPa.\n\nNew\n\nJersey\n\nGuns used in recent shootings of New York City police officers were traced to pawn shops in Georgia.\n\nVa.\n\nN.C.\n\nS.C.\n\nJONESBORO\n\n386\n\nguns\n\nThe Iron Pipeline\n\nPERRY\n\nMany guns used in crimes are brought to New York and New Jersey along Interstate 95. In recent years, more guns have started coming from Pennsylvania gun shows, a federal official said.\n\nGa.\n\n292\n\nguns\n\nFla.\n\nA handgun used in the killing of two Brooklyn officers last year was traced to a pawnshop just south of Atlanta. A revolver used in a fatal shooting of an officer in Queens in May was traced to a roadside pawnshop, also in Georgia, about 100 miles from Atlanta. And a handgun used to kill an officer in East Harlem last month was traced to South Carolina.\n\n“We’re trying to deal with it, but we have a spigot that’s wide open down there and we don’t have a national or local ability to shut that spigot down at the moment,” said the New York City police commissioner, William J. Bratton, as he announced an indictment against gun traffickers last week.\n\nNew York Police Department, via Getty Images\n\nNew York Police Department\n\nTwo guns used in killings of New York City police officers were traced to pawnshops in Georgia.\n\nThe economics are straightforward: A low-quality handgun that sells for $100 in an Atlanta store might sell for $500 or $600 in New York City, researchers say — and it can be transported cheaply. By contrast, the majority of guns used in crimes in Texas, Georgia and other states with more lenient gun laws are purchased in-state.\n\nThe New York Times examined gun trafficking by analyzing nine years of data compiled by the Bureau of Alcohol, Tobacco, Firearms and Explosives, as well as an index of state gun laws developed by researchers at Johns Hopkins University.\n\nLaw enforcement officials express frequent frustration that they are not able to track every gun that crosses state lines, which means the estimates here are conservative. When the police do recover a gun tied to criminal activity, typically after an arrest, they can trace the gun to where it was last sold through a federally licensed dealer.\n\nChicago offers perhaps the starkest example of trafficking. There are no retail gun dealers within city limits, because Chicago has some of the tightest municipal gun regulations. Yet bringing a gun into Chicago can be as simple as driving less than an hour to a gun show in Indiana, where private sales are not recorded and do not require a background check.\n\n“If you’re in the city of Chicago on the South Side, you may be closer to Indiana than you are to the Magnificent Mile,” said Roseanna Ander, executive director of the University of Chicago Crime Lab, referring to a well-known part of Chicago’s downtown.\n\nThe Route Into Chicago\n\nWisconsin\n\nMost guns used in crimes in Illinois were recovered in the Chicago area.\n\nMichigan\n\nIowa\n\nCHICAGO\n\n1,041 guns\n\nIllinois\n\nGun shows in Indiana are a frequent source for guns used in crimes in Illinois.\n\nIndiana\n\nMissouri\n\nMany people in Illinois have family ties to Mississippi, the second most common source for crime guns.\n\nThe Route Into Chicago\n\nMost guns used in crimes in Illinois were recovered in the Chicago area.\n\nWisconsin\n\nIowa\n\nCHICAGO\n\n1,041\n\nguns\n\nIllinois\n\nIndiana\n\nGun shows in Indiana are a frequent source for guns used in crimes in Illinois.\n\nMany people in Illinois have family ties to Mississippi, the second most common source for crime guns.\n\nMissouri\n\nMany guns follow a complex path from the original sale to the underground market. Most guns are originally bought from retail stores, but people who can’t pass a background check typically obtain guns from friends, family or illegal dealers.\n\nAccording to an anonymous survey of inmates in Cook County, Ill., covering 135 guns they had access to, only two had been purchased directly from a gun store. Many inmates reported obtaining guns from friends who had bought them legally and then reported them stolen, or from locals who had brought the guns from out of state.\n\nOne inmate said, “Some people get on a train and bring them back, can be up to five or six guns, depending on how much risk they want to take.”\n\nSome larger traffickers use more elaborate techniques. Buying a gun in Puerto Rico requires an expensive permit and a lengthy application process, but Florida has no such restrictions. Traffickers in Orlando tied to organized gangs in Puerto Rico send guns in the mail, through FedEx, or even encased in cars that travel by ship to the island.\n\n“They’ll buy a $500 car and stuff it with as many guns as possible,” said Carlos Gonzalez, an agent with the Miami division of the Bureau of Alcohol, Tobacco, Firearms and Explosives.\n\nGuns by Mail\n\nOrlando, which has a large Puerto Rican population, is the source for many guns trafficked to Puerto Rico.\n\nORLANDO\n\nFlorida\n\nMIAMI\n\n349 guns\n\nIn 2014, more guns used in crimes in Puerto Rico were traced to purchases in Florida than on the island itself.\n\nCuba\n\nPuerto\n\nRico\n\nHaiti\n\nDom.\n\nRep.\n\nGuns by Mail\n\nOrlando, which has a large Puerto Rican population, is the source for many guns trafficked to Puerto Rico.\n\nORLANDO\n\nFlorida\n\nMIAMI\n\n349\n\nguns\n\nCuba\n\nHaiti\n\nDom.\n\nRep.\n\nIn 2014, more guns used in crimes in Puerto Rico were traced to purchases in Florida than on the island itself.\n\nPuerto\n\nRico\n\nFederal agents and postal inspectors have caught some traffickers, leading to modified techniques, such as shipping guns in newer, more expensive cars or mailing guns from Jacksonville, Fla., instead of Orlando. Stopping such smuggling is logistically hard. “If the U.S. Postal Service were to screen every single package that entered into Puerto Rico, it would bring the economy to a halt,” Mr. Gonzalez said.\n\nMost gun trafficking patterns have remained remarkably constant over time. But some researchers point to a significant shift in Missouri as evidence that changes to one state’s laws can have broad implications.\n\nBefore 2007, Missouri required gun buyers to get a state permit and to undergo background checks on private sales, two restrictions strongly associated with states that provide fewer guns to interstate traffickers, according to research by Daniel Webster, director of the Johns Hopkins Center for Gun Policy and Research. At the time, nearly half of the guns used in crimes and recovered in Missouri were traced to other states, largely from neighboring Kansas and Illinois.\n\nBut when Missouri relaxed its gun control laws in 2007, the flow started to change. The number of guns traced to other states decreased, while the number of guns from within Missouri increased to nearly three-quarters.\n\nSource of guns used in crimes in Missouri\n\n80 percent\n\nGuns from Missouri\n\n74%\n\n60\n\nMore criminals used guns from Missouri after guns became easier to purchase.\n\n40\n\nMissouri repealed strict gun control laws\n\nin August 2007.\n\n26%\n\n20\n\nGuns imported from other states\n\n2014\n\n’12\n\n’10\n\n’08\n\n’06\n\n’04\n\n2002\n\nSource of guns used in crimes in Missouri\n\n80 percent\n\nGuns from Missouri\n\n74%\n\n60\n\nMore criminals used guns from Missouri after guns became easier to purchase.\n\n40\n\nMissouri repealed strict gun control laws in August 2007.\n\n26%\n\nGuns imported from other states\n\n20\n\n2014\n\n’12\n\n’10\n\n’08\n\n’06\n\n’04\n\n’02",
MetaKeywords: "Gun Control,Attacks on Police,Firearms,Bureau of Alcohol Tobacco and Firearms",
CanonicalLink: "http://www.nytimes.com/interactive/2015/11/12/us/gun-traffickers-smuggling-state-gun-laws.html",
TopImage: "http://static01.nyt.com/images/2015/11/12/us/gun-traffickers-smuggling-state-gun-laws-1447372488027/gun-traffickers-smuggling-state-gun-laws-1447372488027-articleLarge-v4.png",
MetaLang: "en",
}
article.Links = []string{
"http://www.motherjones.com/politics/2013/12/state-gun-laws-after-newtown",
"https://www.atf.gov/resource-center/data-statistics",
"http://www.nytimes.com/2014/12/25/nyregion/tracing-the-gun-used-to-kill-2-new-york-city-police-officers.html",
"http://www.nytimes.com/2015/05/06/nyregion/guns-from-georgia-are-linked-to-another-new-york-officers-death.html",
"http://www.nytimes.com/2015/10/27/nyregion/gun-fished-from-harlem-river-is-linked-to-officers-killing.html",
"https://crimelab.uchicago.edu/",
"http://www.sciencedirect.com/science/article/pii/S0091743515001486",
"http://www.jhsph.edu/research/centers-and-institutes/johns-hopkins-center-for-gun-policy-and-research/",
}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)
}
}
func Test_PostFacebookCom(t *testing.T) {
article := Article{
Domain: "post.facebook.com",
Title: "Science - Spewings from Earth’s deep mantle reveal clues...",
MetaDescription: "Spewings from Earth’s deep mantle reveal clues into the origin of our planet’s water. These findings serve as evidence for primordial water on Earth and...",
CleanedText: "Cookies help us to provide, protect and improve Facebook's services. By continuing to use our site, you agree to our cookie policy.",
MetaKeywords: "",
CanonicalLink: "",
TopImage: "",
MetaLang: "en",
}
article.Links = []string{
"https://www.facebook.com/help/cookies?fref=cub",
}
removed := []string{"~~~REMOVED~~~"}
err := ValidateArticle(article, &removed)
if err != nil {
t.Error(err)