Skip to content

Commit

Permalink
Sanctions/unsc sanctions xml/comp 1415 (#62)
Browse files Browse the repository at this point in the history
* added unsc sanction xml

* added test

* removed unused lines

* fixed test and removed dumper

* fixed parsing mechanism

* fixed parser and test

* fixed parsing and test

* sanctions/unsc-sanctions-xml/comp-1415

* sanctions/unsc-sanctions-xml/comp-1415

* removed unused items

* fixed xml link

* [DT]

* fixing redis test

* fixing redis test again

* fixed compile error

* fixed another redis test

* fixed redis test expectation

* fixed another redis expectation

* fixed another redis expectation

* fixed more redis

* redis test manipulation

* added unsc to sample data

* removed unnecessary errors

* fixed more test

* fixed test

* fixed tests some more

* expected fix

* fixed expected

* removed error

* fix

* fixed error

* fixed update expectation

* WIP

* fix

* fix

* added removed line

* made tidy

* changed parses to xml2hash

* removed xml simple import

* removed xml simple from makefile

* fixed birth date and alias issue
  • Loading branch information
saad-deriv authored Oct 29, 2024
1 parent c7cd73d commit 7eaef76
Show file tree
Hide file tree
Showing 6 changed files with 706 additions and 14 deletions.
104 changes: 103 additions & 1 deletion lib/Data/Validate/Sanctions/Fetcher.pm
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ use Syntax::Keyword::Try;
use XML::Fast;
use Locale::Country;


use constant MAX_REDIRECTS => 3;
# VERSION

Expand Down Expand Up @@ -83,6 +82,11 @@ sub config {
url => $eu_url,
parser => \&_eu_xml,
},
'UNSC-Sanctions' => {
description => 'UN: United Nations Security Council Consolidated List',
url => $args{unsc_url} || 'https://scsanctions.un.org/resources/xml/en/consolidated.xml',
parser => \&_unsc_xml,
},
};
}

Expand Down Expand Up @@ -425,6 +429,104 @@ sub _eu_xml {
};
}

sub _unsc_xml {
my ($xml_content) = @_;

# Preprocess the XML content to escape unescaped ampersands
$xml_content =~ s/&(?!(?:amp|lt|gt|quot|apos);)/&/g;
my $data = xml2hash($xml_content,
array =>
['INDIVIDUAL', 'INDIVIDUAL_ALIAS', 'INDIVIDUAL_ADDRESS', 'INDIVIDUAL_DATE_OF_BIRTH', 'INDIVIDUAL_PLACE_OF_BIRTH', 'INDIVIDUAL_DOCUMENT'])
->{CONSOLIDATED_LIST};

# Extract the dateGenerated attribute from the first line of the XML content
my ($date_generated) = $data->{'-dateGenerated'};
die "Corrupt data. Release date is missing\n" unless $date_generated;

# Convert the dateGenerated to epoch milliseconds
my $publish_epoch = _date_to_epoch($date_generated // '');

my $dataset = [];

for my $individual (@{$data->{'INDIVIDUALS'}->{'INDIVIDUAL'}}) {
my %entry;

$entry{first_name} = $individual->{'FIRST_NAME'};
$entry{second_name} = $individual->{'SECOND_NAME'};
$entry{third_name} = $individual->{'THIRD_NAME'} // '';
$entry{fourth_name} = $individual->{'FOURTH_NAME'} // '';
$entry{name_original_script} = $individual->{'NAME_ORIGINAL_SCRIPT'} // '';

my @names = (
$entry{first_name} // '',
$entry{second_name} // '',
$entry{third_name} // '',
$entry{fourth_name} // '',
$entry{name_original_script} // '',
);

foreach my $alias (@{$individual->{INDIVIDUAL_ALIAS}}) {
if (ref($alias->{'ALIAS_NAME'}) ne 'HASH' || %{$alias->{'ALIAS_NAME'}}) {
push @names, $alias->{'ALIAS_NAME'} // '';
}
}
my @dob_list;

if ($individual->{'INDIVIDUAL_DATE_OF_BIRTH'}[0]{'TYPE_OF_DATE'} && $individual->{'INDIVIDUAL_DATE_OF_BIRTH'}[0]{'TYPE_OF_DATE'} eq 'BETWEEN') {
push @dob_list, $individual->{'INDIVIDUAL_DATE_OF_BIRTH'}[0]{'FROM_YEAR'} // '';
push @dob_list, $individual->{'INDIVIDUAL_DATE_OF_BIRTH'}[0]{'TO_YEAR'} // '';
} else {
if ($individual->{'INDIVIDUAL_DATE_OF_BIRTH'}[0]{'DATE'}) {
@dob_list = $individual->{'INDIVIDUAL_DATE_OF_BIRTH'}[0]{'DATE'};
} elsif ($individual->{'INDIVIDUAL_DATE_OF_BIRTH'}[0]{'YEAR'}) {
@dob_list = $individual->{'INDIVIDUAL_DATE_OF_BIRTH'}[0]{'YEAR'};
}
}

my @place_of_birth = (
$individual->{'INDIVIDUAL_PLACE_OF_BIRTH'}[0]{'CITY'} // '',
$individual->{'INDIVIDUAL_PLACE_OF_BIRTH'}[0]{'STATE_PROVINCE'} // '',
$individual->{'INDIVIDUAL_PLACE_OF_BIRTH'}[0]{'COUNTRY'} // ''
);

my @residence = (map { $_->{'COUNTRY'} // '' } @{$individual->{'INDIVIDUAL_ADDRESS'}});
my @postal_code = (map { $_->{'ZIP_CODE'} // '' } @{$individual->{'INDIVIDUAL_ADDRESS'}});

my @nationality = ($individual->{'NATIONALITY'}->{'VALUE'} // '');
my @national_id = [];
my @passport_no = [];

# Extract passport and national identification numbers
foreach my $document (@{$individual->{INDIVIDUAL_DOCUMENT}}) {
if ($document ne "") {
if ($document->{'TYPE_OF_DOCUMENT'} eq 'Passport') {
push @passport_no, $document->{'NUMBER'} // '';
} elsif ($document->{'TYPE_OF_DOCUMENT'} eq 'National Identification Number') {
push @national_id, $document->{'NUMBER'} // '';
}
}
}

_process_sanction_entry(
$dataset,
names => \@names,
date_of_birth => \@dob_list,
place_of_birth => \@place_of_birth,
residence => \@residence,
nationality => \@nationality,
citizen => \@nationality, # no seprate field for citizenship in the XML
postal_code => \@postal_code,
national_id => \@national_id,
passport_no => \@passport_no,
);
}

return {
updated => $publish_epoch,
content => $dataset,
};
}

=head2 run
Fetches latest version of lists, and returns combined hash of successfully downloaded ones
Expand Down
67 changes: 65 additions & 2 deletions t/04_fetcher.t
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ my %args = (
ofac_sdn_url => "file://t/data/sample_ofac_sdn.zip",
ofac_consolidated_url => "file://t/data/sample_ofac_consolidated.xml",
hmt_url => "file://t/data/sample_hmt.csv",
unsc_url => "file://t/data/sample_unsc.xml",
);

my $mocked_ua = Test::MockModule->new('Mojo::UserAgent');
Expand Down Expand Up @@ -55,10 +56,13 @@ subtest 'source url arguments' => sub {
'OFAC-SDN' => {
error => ignore(),
},
'UNSC-Sanctions' => {
error => ignore(),
},
},
'All sources return errors - no content';

is $calls, 3 * 4, 'the fetcher tried thrice per source and failed finally.';
is $calls, 3 * 5, 'the fetcher tried thrice per source and failed finally.';

};

Expand Down Expand Up @@ -253,10 +257,69 @@ subtest 'OFAC Sanctions' => sub {
}
};

subtest 'UNSC Sanctions' => sub {
my $data = Data::Validate::Sanctions::Fetcher::run(%args);

my $source_name = 'UNSC-Sanctions';
ok $data->{$source_name}, 'Sanctions are loaded from the sample file';
is $data->{$source_name}{updated}, 1729123202, "Sanctions update date matches the content of sample file";
is scalar @{$data->{$source_name}{content}}, 7, "Number of names matches the content of the sample file";


is_deeply find_entry_by_name($data->{$source_name}, 'MOHAMMAD NAIM'),
{
'national_id' => [
[]
],
'place_of_birth' => [
'af'
],
'citizen' => [
'af'
],
'nationality' => [
'af'
],
'postal_code' => [
'63000'
],
'passport_no' => [
[]
],
'dob_year' => [
'1975'
],
'names' => [
'MOHAMMAD NAIM',
'BARICH',
'KHUDAIDAD',
"\x{645}\x{62d}\x{645}\x{62f} \x{646}\x{639}\x{64a}\x{645} \x{628}\x{631}\x{64a}\x{62e} \x{62e}\x{62f}\x{627}\x{64a}\x{62f}\x{627}\x{62f}",
'Mullah Naeem Barech',
'Mullah Naeem Baraich',
'Mullah Naimullah',
'Mullah Naim Bareh',
'Mohammad Naim',
'Mullah Naim Barich',
'Mullah Naim Barech',
'Mullah Naim Barech Akhund',
'Mullah Naeem Baric',
'Naim Berich',
'Haji Gul Mohammed Naim Barich',
'Gul Mohammad',
'Haji Ghul Mohammad',
'Spen Zrae',
'Gul Mohammad Kamran',
'Mawlawi Gul Mohammad'
]
},
'Alias names as saved in a single entry';
};

sub find_entry_by_name {
my ($data, $name) = @_;

my @result;

for my $entry ($data->{content}->@*) {
push(@result, $entry) if List::Util::any { $_ eq $name } $entry->{names}->@*;
}
Expand All @@ -268,4 +331,4 @@ sub find_entry_by_name {
return \@result;
}

done_testing;
done_testing();
72 changes: 63 additions & 9 deletions t/05_sanctions_redis.t
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,21 @@ my $sample_data = {
passport_no => ['asdffdsa'],
}]
},
'UNSC-Sanctions' => {
updated => 91,
content => [{
names => ['UBL'],
dob_epoch => [],
dob_year => []
},
{
names => ['USAMA BIN LADEN'],
dob_epoch => [],
dob_year => []
},
],
error => ''
},
};

subtest 'Class constructor' => sub {
Expand Down Expand Up @@ -106,6 +121,12 @@ subtest 'Class constructor' => sub {
updated => 0,
error => ''
},
'UNSC-Sanctions' => {
content => [],
verified => 0,
updated => 0,
error => ''
},
},
'There is no sanction data';
};
Expand All @@ -118,6 +139,10 @@ subtest 'Update Data' => sub {
updated => 90,
content => []
},
'UNSC-Sanctions' => {
updated => 90,
content => []
},
};
$mock_fetcher->redefine(run => sub { return clone($mock_data) });

Expand Down Expand Up @@ -157,22 +182,30 @@ subtest 'Update Data' => sub {
updated => 0,
error => ''
},
'UNSC-Sanctions' => {
content => [],
verified => 1500,
updated => 90,
},
};
is_deeply $validator->data, $expected, 'Data is correctly loaded';
check_redis_content('EU-Sanctions', $mock_data->{'EU-Sanctions'}, 1500);
check_redis_content('HMT-Sanctions', {}, 1500);
check_redis_content('OFAC-Consolidated', {}, 1500);
check_redis_content('OFAC-SDN', {}, 1500);
check_redis_content('EU-Sanctions', $mock_data->{'EU-Sanctions'}, 1500);
check_redis_content('HMT-Sanctions', {}, 1500);
check_redis_content('OFAC-Consolidated', {}, 1500);
check_redis_content('OFAC-SDN', {}, 1500);
check_redis_content('UNSC-Sanctions', $mock_data->{'UNSC-Sanctions'}, 1500);
is $index_call_counter, 1, 'index called after update';
$validator->update_data();
is $index_call_counter, 1, 'index not been called after update, due to unchanged data';

# rewrite to redis if update (publish) time is changed
set_fixed_time(1600);
$mock_data->{'EU-Sanctions'}->{updated} = 91;
$mock_data->{'EU-Sanctions'}->{updated} = 91;
$mock_data->{'UNSC-Sanctions'}->{updated} = 91;
$validator->update_data();
$expected->{'EU-Sanctions'}->{updated} = 91;
$expected->{$_}->{verified} = 1600 for keys %$expected;
$expected->{'EU-Sanctions'}->{updated} = 91;
$expected->{'UNSC-Sanctions'}->{updated} = 91;
$expected->{$_}->{verified} = 1600 for keys %$expected;
is_deeply $validator->data, $expected, 'Data is loaded with new update time';
check_redis_content('EU-Sanctions', $mock_data->{'EU-Sanctions'}, 1600, 'Redis content changed by increased update time');
is $index_call_counter, 2, 'index called after update';
Expand All @@ -193,8 +226,23 @@ subtest 'Update Data' => sub {
},
]
},
'UNSC-Sanctions' => {
updated => 91,
content => [{
names => ['UBL'],
dob_epoch => [],
dob_year => []
},
{
names => ['USAMA BIN LADEN'],
dob_epoch => [],
dob_year => []
},
]
},
};
$expected->{'EU-Sanctions'} = clone($mock_data->{'EU-Sanctions'});
$expected->{'EU-Sanctions'} = clone($mock_data->{'EU-Sanctions'});
$expected->{'UNSC-Sanctions'} = clone($mock_data->{'UNSC-Sanctions'});
set_fixed_time(1700);
$validator->update_data();
$expected->{$_}->{verified} = 1700 for keys %$expected;
Expand All @@ -215,7 +263,7 @@ subtest 'Update Data' => sub {

set_fixed_time(1850);
$validator = Data::Validate::Sanctions::Redis->new(connection => $redis);
is_deeply $validator->data->{'EU-Sanctions'}, $expected->{'EU-Sanctions'}, 'All fieds are correctly loaded form redis in constructor';
is_deeply $validator->data->{'EU-Sanctions'}, $expected->{'EU-Sanctions'}, 'All fields are correctly loaded form redis in constructor';

# All sources are updated at the same time
$mock_data = $sample_data;
Expand Down Expand Up @@ -260,6 +308,12 @@ subtest 'load data' => sub {
verified => 0,
updated => 0,
error => ''
},
'UNSC-Sanctions' => {
content => [],
verified => 0,
updated => 0,
error => ''
}};
is_deeply $validator->data, $expected, 'Sanction lists are loaded with default values when redis is empty';
is $validator->last_updated, 0, 'Updated date is zero';
Expand Down
4 changes: 3 additions & 1 deletion t/06_fetcher_sources.t
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ subtest 'Fetch and process all sources from default urls' => sub {
hmt_url => "file://t/data/sample_hmt.csv",
);

is_deeply [sort keys %$data], [qw(EU-Sanctions HMT-Sanctions OFAC-Consolidated OFAC-SDN )], 'sanction source list is correct';
is_deeply [sort keys %$data], [qw(EU-Sanctions HMT-Sanctions OFAC-Consolidated OFAC-SDN UNSC-Sanctions)], 'sanction source list is correct';

cmp_ok($data->{'EU-Sanctions'}{updated}, '>=', 1541376000, "Fetcher::run HMT-Sanctions sanctions.yml");

Expand All @@ -25,6 +25,8 @@ subtest 'Fetch and process all sources from default urls' => sub {

cmp_ok($data->{'OFAC-Consolidated'}{updated}, '>=', 1541376000, "Fetcher::run OFAC-Consolidated sanctions.yml");

cmp_ok($data->{'UNSC-Sanctions'}{updated}, '>=', 1541376000, "Fetcher::run HMT-Sanctions sanctions.yml");

cmp_ok(scalar $data->{'HMT-Sanctions'}{'content'}->@*, '==', 23, "HMT-Sanctions namelist - sample file");
};

Expand Down
Loading

0 comments on commit 7eaef76

Please sign in to comment.