diff --git a/lib/Data/Validate/Sanctions/Fetcher.pm b/lib/Data/Validate/Sanctions/Fetcher.pm index 4e08e781..38909074 100644 --- a/lib/Data/Validate/Sanctions/Fetcher.pm +++ b/lib/Data/Validate/Sanctions/Fetcher.pm @@ -14,7 +14,6 @@ use Syntax::Keyword::Try; use XML::Fast; use Locale::Country; - use constant MAX_REDIRECTS => 3; # VERSION @@ -83,6 +82,11 @@ sub config { url => $eu_url, parser => \&_eu_xml, }, + 'UNSC-Sanctions' => { + description => 'UN: United Nations Security Council Consolidated List', + url => $args{unsc_url} || 'https://scsanctions.un.org/resources/xml/en/consolidated.xml', + parser => \&_unsc_xml, + }, }; } @@ -425,6 +429,104 @@ sub _eu_xml { }; } +sub _unsc_xml { + my ($xml_content) = @_; + + # Preprocess the XML content to escape unescaped ampersands + $xml_content =~ s/&(?!(?:amp|lt|gt|quot|apos);)/&/g; + my $data = xml2hash($xml_content, + array => + ['INDIVIDUAL', 'INDIVIDUAL_ALIAS', 'INDIVIDUAL_ADDRESS', 'INDIVIDUAL_DATE_OF_BIRTH', 'INDIVIDUAL_PLACE_OF_BIRTH', 'INDIVIDUAL_DOCUMENT']) + ->{CONSOLIDATED_LIST}; + + # Extract the dateGenerated attribute from the first line of the XML content + my ($date_generated) = $data->{'-dateGenerated'}; + die "Corrupt data. Release date is missing\n" unless $date_generated; + + # Convert the dateGenerated to epoch milliseconds + my $publish_epoch = _date_to_epoch($date_generated // ''); + + my $dataset = []; + + for my $individual (@{$data->{'INDIVIDUALS'}->{'INDIVIDUAL'}}) { + my %entry; + + $entry{first_name} = $individual->{'FIRST_NAME'}; + $entry{second_name} = $individual->{'SECOND_NAME'}; + $entry{third_name} = $individual->{'THIRD_NAME'} // ''; + $entry{fourth_name} = $individual->{'FOURTH_NAME'} // ''; + $entry{name_original_script} = $individual->{'NAME_ORIGINAL_SCRIPT'} // ''; + + my @names = ( + $entry{first_name} // '', + $entry{second_name} // '', + $entry{third_name} // '', + $entry{fourth_name} // '', + $entry{name_original_script} // '', + ); + + foreach my $alias (@{$individual->{INDIVIDUAL_ALIAS}}) { + if (ref($alias->{'ALIAS_NAME'}) ne 'HASH' || %{$alias->{'ALIAS_NAME'}}) { + push @names, $alias->{'ALIAS_NAME'} // ''; + } + } + my @dob_list; + + if ($individual->{'INDIVIDUAL_DATE_OF_BIRTH'}[0]{'TYPE_OF_DATE'} && $individual->{'INDIVIDUAL_DATE_OF_BIRTH'}[0]{'TYPE_OF_DATE'} eq 'BETWEEN') { + push @dob_list, $individual->{'INDIVIDUAL_DATE_OF_BIRTH'}[0]{'FROM_YEAR'} // ''; + push @dob_list, $individual->{'INDIVIDUAL_DATE_OF_BIRTH'}[0]{'TO_YEAR'} // ''; + } else { + if ($individual->{'INDIVIDUAL_DATE_OF_BIRTH'}[0]{'DATE'}) { + @dob_list = $individual->{'INDIVIDUAL_DATE_OF_BIRTH'}[0]{'DATE'}; + } elsif ($individual->{'INDIVIDUAL_DATE_OF_BIRTH'}[0]{'YEAR'}) { + @dob_list = $individual->{'INDIVIDUAL_DATE_OF_BIRTH'}[0]{'YEAR'}; + } + } + + my @place_of_birth = ( + $individual->{'INDIVIDUAL_PLACE_OF_BIRTH'}[0]{'CITY'} // '', + $individual->{'INDIVIDUAL_PLACE_OF_BIRTH'}[0]{'STATE_PROVINCE'} // '', + $individual->{'INDIVIDUAL_PLACE_OF_BIRTH'}[0]{'COUNTRY'} // '' + ); + + my @residence = (map { $_->{'COUNTRY'} // '' } @{$individual->{'INDIVIDUAL_ADDRESS'}}); + my @postal_code = (map { $_->{'ZIP_CODE'} // '' } @{$individual->{'INDIVIDUAL_ADDRESS'}}); + + my @nationality = ($individual->{'NATIONALITY'}->{'VALUE'} // ''); + my @national_id = []; + my @passport_no = []; + + # Extract passport and national identification numbers + foreach my $document (@{$individual->{INDIVIDUAL_DOCUMENT}}) { + if ($document ne "") { + if ($document->{'TYPE_OF_DOCUMENT'} eq 'Passport') { + push @passport_no, $document->{'NUMBER'} // ''; + } elsif ($document->{'TYPE_OF_DOCUMENT'} eq 'National Identification Number') { + push @national_id, $document->{'NUMBER'} // ''; + } + } + } + + _process_sanction_entry( + $dataset, + names => \@names, + date_of_birth => \@dob_list, + place_of_birth => \@place_of_birth, + residence => \@residence, + nationality => \@nationality, + citizen => \@nationality, # no seprate field for citizenship in the XML + postal_code => \@postal_code, + national_id => \@national_id, + passport_no => \@passport_no, + ); + } + + return { + updated => $publish_epoch, + content => $dataset, + }; +} + =head2 run Fetches latest version of lists, and returns combined hash of successfully downloaded ones diff --git a/t/04_fetcher.t b/t/04_fetcher.t index 3174905c..b1ef64eb 100644 --- a/t/04_fetcher.t +++ b/t/04_fetcher.t @@ -20,6 +20,7 @@ my %args = ( ofac_sdn_url => "file://t/data/sample_ofac_sdn.zip", ofac_consolidated_url => "file://t/data/sample_ofac_consolidated.xml", hmt_url => "file://t/data/sample_hmt.csv", + unsc_url => "file://t/data/sample_unsc.xml", ); my $mocked_ua = Test::MockModule->new('Mojo::UserAgent'); @@ -55,10 +56,13 @@ subtest 'source url arguments' => sub { 'OFAC-SDN' => { error => ignore(), }, + 'UNSC-Sanctions' => { + error => ignore(), + }, }, 'All sources return errors - no content'; - is $calls, 3 * 4, 'the fetcher tried thrice per source and failed finally.'; + is $calls, 3 * 5, 'the fetcher tried thrice per source and failed finally.'; }; @@ -253,10 +257,69 @@ subtest 'OFAC Sanctions' => sub { } }; +subtest 'UNSC Sanctions' => sub { + my $data = Data::Validate::Sanctions::Fetcher::run(%args); + + my $source_name = 'UNSC-Sanctions'; + ok $data->{$source_name}, 'Sanctions are loaded from the sample file'; + is $data->{$source_name}{updated}, 1729123202, "Sanctions update date matches the content of sample file"; + is scalar @{$data->{$source_name}{content}}, 7, "Number of names matches the content of the sample file"; + + + is_deeply find_entry_by_name($data->{$source_name}, 'MOHAMMAD NAIM'), + { + 'national_id' => [ + [] + ], + 'place_of_birth' => [ + 'af' + ], + 'citizen' => [ + 'af' + ], + 'nationality' => [ + 'af' + ], + 'postal_code' => [ + '63000' + ], + 'passport_no' => [ + [] + ], + 'dob_year' => [ + '1975' + ], + 'names' => [ + 'MOHAMMAD NAIM', + 'BARICH', + 'KHUDAIDAD', + "\x{645}\x{62d}\x{645}\x{62f} \x{646}\x{639}\x{64a}\x{645} \x{628}\x{631}\x{64a}\x{62e} \x{62e}\x{62f}\x{627}\x{64a}\x{62f}\x{627}\x{62f}", + 'Mullah Naeem Barech', + 'Mullah Naeem Baraich', + 'Mullah Naimullah', + 'Mullah Naim Bareh', + 'Mohammad Naim', + 'Mullah Naim Barich', + 'Mullah Naim Barech', + 'Mullah Naim Barech Akhund', + 'Mullah Naeem Baric', + 'Naim Berich', + 'Haji Gul Mohammed Naim Barich', + 'Gul Mohammad', + 'Haji Ghul Mohammad', + 'Spen Zrae', + 'Gul Mohammad Kamran', + 'Mawlawi Gul Mohammad' + ] + }, + 'Alias names as saved in a single entry'; +}; + sub find_entry_by_name { my ($data, $name) = @_; my @result; + for my $entry ($data->{content}->@*) { push(@result, $entry) if List::Util::any { $_ eq $name } $entry->{names}->@*; } @@ -268,4 +331,4 @@ sub find_entry_by_name { return \@result; } -done_testing; +done_testing(); diff --git a/t/05_sanctions_redis.t b/t/05_sanctions_redis.t index 2261125f..324fd68d 100644 --- a/t/05_sanctions_redis.t +++ b/t/05_sanctions_redis.t @@ -72,6 +72,21 @@ my $sample_data = { passport_no => ['asdffdsa'], }] }, + 'UNSC-Sanctions' => { + updated => 91, + content => [{ + names => ['UBL'], + dob_epoch => [], + dob_year => [] + }, + { + names => ['USAMA BIN LADEN'], + dob_epoch => [], + dob_year => [] + }, + ], + error => '' + }, }; subtest 'Class constructor' => sub { @@ -106,6 +121,12 @@ subtest 'Class constructor' => sub { updated => 0, error => '' }, + 'UNSC-Sanctions' => { + content => [], + verified => 0, + updated => 0, + error => '' + }, }, 'There is no sanction data'; }; @@ -118,6 +139,10 @@ subtest 'Update Data' => sub { updated => 90, content => [] }, + 'UNSC-Sanctions' => { + updated => 90, + content => [] + }, }; $mock_fetcher->redefine(run => sub { return clone($mock_data) }); @@ -157,22 +182,30 @@ subtest 'Update Data' => sub { updated => 0, error => '' }, + 'UNSC-Sanctions' => { + content => [], + verified => 1500, + updated => 90, + }, }; is_deeply $validator->data, $expected, 'Data is correctly loaded'; - check_redis_content('EU-Sanctions', $mock_data->{'EU-Sanctions'}, 1500); - check_redis_content('HMT-Sanctions', {}, 1500); - check_redis_content('OFAC-Consolidated', {}, 1500); - check_redis_content('OFAC-SDN', {}, 1500); + check_redis_content('EU-Sanctions', $mock_data->{'EU-Sanctions'}, 1500); + check_redis_content('HMT-Sanctions', {}, 1500); + check_redis_content('OFAC-Consolidated', {}, 1500); + check_redis_content('OFAC-SDN', {}, 1500); + check_redis_content('UNSC-Sanctions', $mock_data->{'UNSC-Sanctions'}, 1500); is $index_call_counter, 1, 'index called after update'; $validator->update_data(); is $index_call_counter, 1, 'index not been called after update, due to unchanged data'; # rewrite to redis if update (publish) time is changed set_fixed_time(1600); - $mock_data->{'EU-Sanctions'}->{updated} = 91; + $mock_data->{'EU-Sanctions'}->{updated} = 91; + $mock_data->{'UNSC-Sanctions'}->{updated} = 91; $validator->update_data(); - $expected->{'EU-Sanctions'}->{updated} = 91; - $expected->{$_}->{verified} = 1600 for keys %$expected; + $expected->{'EU-Sanctions'}->{updated} = 91; + $expected->{'UNSC-Sanctions'}->{updated} = 91; + $expected->{$_}->{verified} = 1600 for keys %$expected; is_deeply $validator->data, $expected, 'Data is loaded with new update time'; check_redis_content('EU-Sanctions', $mock_data->{'EU-Sanctions'}, 1600, 'Redis content changed by increased update time'); is $index_call_counter, 2, 'index called after update'; @@ -193,8 +226,23 @@ subtest 'Update Data' => sub { }, ] }, + 'UNSC-Sanctions' => { + updated => 91, + content => [{ + names => ['UBL'], + dob_epoch => [], + dob_year => [] + }, + { + names => ['USAMA BIN LADEN'], + dob_epoch => [], + dob_year => [] + }, + ] + }, }; - $expected->{'EU-Sanctions'} = clone($mock_data->{'EU-Sanctions'}); + $expected->{'EU-Sanctions'} = clone($mock_data->{'EU-Sanctions'}); + $expected->{'UNSC-Sanctions'} = clone($mock_data->{'UNSC-Sanctions'}); set_fixed_time(1700); $validator->update_data(); $expected->{$_}->{verified} = 1700 for keys %$expected; @@ -215,7 +263,7 @@ subtest 'Update Data' => sub { set_fixed_time(1850); $validator = Data::Validate::Sanctions::Redis->new(connection => $redis); - is_deeply $validator->data->{'EU-Sanctions'}, $expected->{'EU-Sanctions'}, 'All fieds are correctly loaded form redis in constructor'; + is_deeply $validator->data->{'EU-Sanctions'}, $expected->{'EU-Sanctions'}, 'All fields are correctly loaded form redis in constructor'; # All sources are updated at the same time $mock_data = $sample_data; @@ -260,6 +308,12 @@ subtest 'load data' => sub { verified => 0, updated => 0, error => '' + }, + 'UNSC-Sanctions' => { + content => [], + verified => 0, + updated => 0, + error => '' }}; is_deeply $validator->data, $expected, 'Sanction lists are loaded with default values when redis is empty'; is $validator->last_updated, 0, 'Updated date is zero'; diff --git a/t/06_fetcher_sources.t b/t/06_fetcher_sources.t index 9f59b453..250d1352 100644 --- a/t/06_fetcher_sources.t +++ b/t/06_fetcher_sources.t @@ -15,7 +15,7 @@ subtest 'Fetch and process all sources from default urls' => sub { hmt_url => "file://t/data/sample_hmt.csv", ); - is_deeply [sort keys %$data], [qw(EU-Sanctions HMT-Sanctions OFAC-Consolidated OFAC-SDN )], 'sanction source list is correct'; + is_deeply [sort keys %$data], [qw(EU-Sanctions HMT-Sanctions OFAC-Consolidated OFAC-SDN UNSC-Sanctions)], 'sanction source list is correct'; cmp_ok($data->{'EU-Sanctions'}{updated}, '>=', 1541376000, "Fetcher::run HMT-Sanctions sanctions.yml"); @@ -25,6 +25,8 @@ subtest 'Fetch and process all sources from default urls' => sub { cmp_ok($data->{'OFAC-Consolidated'}{updated}, '>=', 1541376000, "Fetcher::run OFAC-Consolidated sanctions.yml"); + cmp_ok($data->{'UNSC-Sanctions'}{updated}, '>=', 1541376000, "Fetcher::run HMT-Sanctions sanctions.yml"); + cmp_ok(scalar $data->{'HMT-Sanctions'}{'content'}->@*, '==', 23, "HMT-Sanctions namelist - sample file"); }; diff --git a/t/data/sample_unsc.xml b/t/data/sample_unsc.xml new file mode 100644 index 00000000..742b59e8 --- /dev/null +++ b/t/data/sample_unsc.xml @@ -0,0 +1,471 @@ + + + + 2797952 + 1 + JABER + ABDALLAH + JABER + AHMAD AL-JALAHMAH + Al-Qaida + QDi.237 + 2014-01-03 + جابر عبد الله جابر أحمد الجلاهمة + Previously listed between 16 Jan. 2008 and 3 Jan. 2014 (amended on 1 Jul. 2008, 23 Jul. 2008, 25 Jan. 2010). Review pursuant to Security Council resolution 1822 (2008) was concluded on 14 Sep. 2009. INTERPOL-UN Security Council Special Notice web link:https://www.interpol.int/en/How-we-work/Notices/View-UN-Notices-Individuals + + Kuwait + + + UN List + + + + + + Good + Jaber Al-Jalamah + + + Good + Abu Muhammad Al-Jalahmah + + + Good + Jabir Abdallah Jabir Ahmad Jalahmah + + + Good + Jabir 'Abdallah Jabir Ahmad Al-Jalamah + + + Good + Jabir Al-Jalhami + + + Low + Abdul-Ghani + + + Low + Abu Muhammad + + + Kuwait + residence as at March 2009 and at December 2013 + + + EXACT + 1959-09-24 + + + Al-Khitan area + Kuwait + + + Passport + 101423404 + + + Passport + 2541451 + Kuwait + valid until 16 Feb. 2017 + + + Passport + 002327881 + Kuwait + + + National Identification Number + 259092401188 + Kuwait + + + + + + 110897 + 1 + MOHAMMAD NAIM + BARICH + KHUDAIDAD + Taliban + TAi.013 + 2001-02-23 + Male + محمد نعيم بريخ خدايداد + Member of the Taliban Military Commission as at mid-2013. Believed to be in Afghanistan/Pakistan border area. Belongs to Barich tribe. Review pursuant to Security Council resolution 1822 (2008) was concluded on 1 Jun. 2010. INTERPOL-UN Security Council Special Notice web link:https://www.interpol.int/en/How-we-work/Notices/View-UN-Notices-Individuals + + <VALUE>Mullah</VALUE> + + + Deputy Minister of Civil Aviation under the Taliban regime + + + Afghanistan + + + UN List + + + 2003-09-03 + 2007-07-09 + 2007-09-21 + 2012-02-13 + 2012-08-13 + 2013-04-22 + 2013-12-31 + + + Good + Mullah Naeem Barech + + + Good + Mullah Naeem Baraich + + + Good + Mullah Naimullah + + + Good + Mullah Naim Bareh + + + Good + Mohammad Naim + previously listed as + + + Good + Mullah Naim Barich + + + Good + Mullah Naim Barech + + + Good + Mullah Naim Barech Akhund + + + Good + Mullah Naeem Baric + + + Good + Naim Berich + + + Good + Haji Gul Mohammed Naim Barich + + + Good + Gul Mohammad + + + Good + Haji Ghul Mohammad + + + Low + Spen Zrae + + + Good + Gul Mohammad Kamran + + + Good + Mawlawi Gul Mohammad + + + + + + + + + + + + + + + + + + + + 63000 + + + APPROXIMATELY + 1975 + + + Lakhi village, Hazarjuft Area, Garmsir District + Helmand Province + Afghanistan + + + + + + + 110899 + 1 + FAZL MOHAMMAD + MAZLOOM + Taliban + TAi.023 + 2001-02-23 + فضل محمد مظلوم + Review pursuant to Security Council resolution 1822 (2008) was concluded on 23 Jul. 2010. INTERPOL-UN Security Council Special Notice web link:https://www.interpol.int/en/How-we-work/Notices/View-UN-Notices-Individuals + + <VALUE>Mullah</VALUE> + + + Deputy Chief of Army Staff of the Taliban regime + + + Afghanistan + + + UN List + + + 2003-09-03 + 2007-09-21 + 2008-10-03 + 2013-12-31 + 2016-09-07 + + + Good + Molah Fazl + + + Good + Fazel Mohammad Mazloom + + + Qatar + + + BETWEEN + 1963 + 1968 + + + Uruzgan + Afghanistan + + + + + + + 110900 + 1 + SAID AHMED + SHAHIDKHEL + Taliban + TAi.028 + 2001-02-23 + سيد أحمد شهيد خيل + In July 2003 he was in custody in Kabul, Afghanistan. Released from custody in 2007. Believed to be in Afghanistan/Pakistan border area. Member of the Taliban leadership council as of mid-2013. Belongs to Andar tribe. Review pursuant to Security Council resolution 1822 (2008) was concluded on 23 Jul. 2010. INTERPOL-UN Security Council Special Notice web link:https://www.interpol.int/en/How-we-work/Notices/View-UN-Notices-Individuals + + <VALUE>Maulavi</VALUE> + + + Deputy Minister of Education under the Taliban regime + + + Afghanistan + + + UN List + + + 2003-09-03 + 2005-12-20 + 2007-09-21 + 2011-11-29 + 2013-12-31 + + + + + + + + + + APPROXIMATELY + 1975 + + + Spandeh (Espandi ‘Olya) village, Andar District + Ghazni Province + Afghanistan + + + + + + + 110901 + 1 + MOHAMMAD + AHMADI + Taliban + TAi.031 + 2001-02-23 + محمد احمدی + Believed to be in Afghanistan/Pakistan border area. Belongs to Kakar tribe. He is a member of the Taliban Supreme Council. Review pursuant to Security Council resolution 1822 (2008) was concluded on 23 Jul. 2010. INTERPOL-UN Security Council Special Notice web link:https://www.interpol.int/en/How-we-work/Notices/View-UN-Notices-Individuals + + <VALUE>Mullah</VALUE> + <VALUE>Haji</VALUE> + + + President of Central Bank (Da Afghanistan Bank) under the Taliban regime + Minister of Finance under the Taliban regime + + + Afghanistan + + + UN List + + + 2003-09-03 + 2007-09-21 + 2011-11-29 + 2013-12-31 + + + + + + + + + + + Kushal Khan Mena, District Number 5 + Kabul + Afghanistan + + + APPROXIMATELY + 1963 + + + Daman District + Kandahar Province + Afghanistan + + + + + + + 110912 + 1 + ABDUL RAHMAN + ZAHED + Taliban + TAi.033 + 2001-01-25 + عبدالرحمان زاهد + Believed to be in Afghanistan/Pakistan border area. Review pursuant to Security Council resolution 1822 (2008) was concluded on 21 Jul. 2010. INTERPOL-UN Security Council Special Notice web link:https://www.interpol.int/en/How-we-work/Notices/View-UN-Notices-Individuals + + <VALUE>Mullah</VALUE> + + + Deputy Minister of Foreign Affairs under the Taliban regime + + + Afghanistan + + + UN List + + + 2003-09-03 + 2007-07-18 + 2007-09-21 + 2011-11-29 + + + Good + Abdul Rehman Zahid + + + + + + APPROXIMATELY + 1963 + + + Kharwar District + Logar Province + Afghanistan + + + + + + + 110913 + 1 + FAIZ + Taliban + TAi.036 + 2001-02-23 + فيض + Review pursuant to Security Council resolution 1822 (2008) was concluded on 23 Jul. 2010. INTERPOL-UN Security Council Special Notice web link:https://www.interpol.int/en/How-we-work/Notices/View-UN-Notices-Individuals + + <VALUE>Maulavi</VALUE> + + + Head of the Information Department, Ministry of Foreign Affairs under the Taliban regime + + + Afghanistan + + + UN List + + + 2003-09-03 + 2007-07-09 + 2007-09-21 + 2011-11-29 + + + + + + + + + + APPROXIMATELY + 1969 + + + Ghazni Province + Afghanistan + + + + + + + \ No newline at end of file diff --git a/xt/20_update.t b/xt/20_update.t index 95139fa6..c1be23c3 100644 --- a/xt/20_update.t +++ b/xt/20_update.t @@ -6,7 +6,7 @@ use File::Temp qw(tempfile); use FindBin qw($Bin); use File::stat; use Path::Tiny; -use YAML::XS qw(Dump); +use YAML::XS qw(Dump); use Test::MockTime qw(set_fixed_time); my $sanction_file;