Skip to content

Commit

Permalink
Merge pull request #1323 from metacpan/haarg/contrib-faster-more-resi…
Browse files Browse the repository at this point in the history
…lient

make contributor script faster and more resilient
  • Loading branch information
haarg authored Nov 22, 2024
2 parents 232b751 + bea9c1f commit c2b0593
Showing 1 changed file with 53 additions and 19 deletions.
72 changes: 53 additions & 19 deletions lib/MetaCPAN/Script/Role/Contributor.pm
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,28 @@ sub update_contributors {
},
);

my $bulk = $self->es->bulk_helper( es_doc_path('contributor') );
my $report = sub {
my ( $action, $result, $i ) = @_;
if ( $i == 0 ) {
log_info {'flushing contributor updates'};
}
};

my $bulk = $self->es->bulk_helper(
es_doc_path('contributor'),
on_success => $report,
on_error => $report,
);

log_info { 'updating contributors for ' . $scroll->total . ' releases' };

while ( my $release = $scroll->next ) {
my $source = $release->{_source};
my $name = $source->{name};
if ( !( $name && $source->{author} && $source->{distribution} ) ) {
Dlog_warn {"found broken release: $_"} $release;
next;
}
log_debug { 'updating contributors for ' . $release->{_source}{name} };
my $actions = $self->release_contributor_update_actions(
$release->{_source} );
Expand Down Expand Up @@ -78,6 +97,11 @@ sub release_contributor_update_actions {
return \@actions;
}

has email_mapping => (
is => 'ro',
default => sub { {} },
);

sub get_contributors {
my ( $self, $release ) = @_;

Expand Down Expand Up @@ -164,24 +188,34 @@ sub get_contributors {
}

if (%want_email) {
my $check_author = $self->es->search(
es_doc_path('author'),
body => {
query => { terms => { email => [ sort keys %want_email ] } },
_source => [ 'email', 'pauseid' ],
size => 100,
},
);

for my $author ( @{ $check_author->{hits}{hits} } ) {
my $emails = $author->{_source}{email};
$emails = [$emails]
if !ref $emails;
my $pauseid = uc $author->{_source}{pauseid};
for my $email (@$emails) {
for my $contrib ( @{ $want_email{$email} } ) {
$contrib->{pauseid} = $pauseid;
}
my $email_mapping = $self->email_mapping;

my @fetch_email = grep !exists $email_mapping->{$_},
sort keys %want_email;

if (@fetch_email) {
my $check_author = $self->es->search(
es_doc_path('author'),
body => {
query => { terms => { email => \@fetch_email } },
_source => [ 'email', 'pauseid' ],
size => 100,
},
);

for my $author ( @{ $check_author->{hits}{hits} } ) {
my $pauseid = uc $author->{_source}{pauseid};
my $emails = $author->{_source}{email};
$email_mapping->{$_} //= $pauseid
for ref $emails ? @$emails : $emails;
}
}

for my $email ( keys %want_email ) {
my $pauseid = $email_mapping->{$email}
or next;
for my $contrib ( @{ $want_email{$email} } ) {
$contrib->{pauseid} = $pauseid;
}
}
}
Expand Down

0 comments on commit c2b0593

Please sign in to comment.