diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 30a0994..5d3372d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,7 +17,7 @@ jobs: strategy: matrix: os: ['ubuntu-latest'] - perl: [ '5.36', '5.34', '5.32', '5.30', '5.28', '5.26', '5.24', '5.22', '5.20', '5.18', '5.16', '5.14' ] + perl: [ '5.38', '5.36', '5.34', '5.32', '5.30', '5.28', '5.26', '5.24', '5.22', '5.20', '5.18', '5.16' ] name: Perl ${{ matrix.perl }} on ${{ matrix.os }} steps: - uses: actions/checkout@v3 diff --git a/CopyIndexes.mkdn b/CopyIndexes.mkdn index 0bc11bf..2f5c62a 100644 --- a/CopyIndexes.mkdn +++ b/CopyIndexes.mkdn @@ -4,7 +4,7 @@ es-copy-index.pl - Copy an index from one cluster to another # VERSION -version 8.5 +version 8.6 # SYNOPSIS diff --git a/Maintenance.mkdn b/Maintenance.mkdn index 8ac5dee..159eebb 100644 --- a/Maintenance.mkdn +++ b/Maintenance.mkdn @@ -4,7 +4,7 @@ es-daily-index-maintenance.pl - Run to prune old indexes and optimize existing # VERSION -version 8.5 +version 8.6 # SYNOPSIS diff --git a/README.mkdn b/README.mkdn index 3dc3539..4f75bde 100644 --- a/README.mkdn +++ b/README.mkdn @@ -4,7 +4,7 @@ App::ElasticSearch::Utilities - Utilities for Monitoring ElasticSearch # VERSION -version 8.5 +version 8.6 # SYNOPSIS @@ -217,6 +217,21 @@ Returns a hashref Performs flattening that's compatible with Elasticsearch's flattening. +## es\_human\_count + +Takes a number and returns the number as a string in docs, thousands, millions, or billions. + + 1_000 -> "1.00 thousand", + 1_000_000 -> "1.00 million", + +## es\_human\_size + +Takes a number and returns the number as a string in bytes, Kb, Mb, Gb, or Tb using base 1024. + + 1024 -> '1.00 Kb', + 1048576 -> '1.00 Mb', + 1073741824 -> '1.00 Gb', + ## def('key') Exposes Definitions grabbed by options parsing diff --git a/Searching.mkdn b/Searching.mkdn index eddb8c0..cb8478f 100644 --- a/Searching.mkdn +++ b/Searching.mkdn @@ -4,7 +4,7 @@ es-search.pl - Provides a CLI for quick searches of data in ElasticSearch daily # VERSION -version 8.5 +version 8.6 # SYNOPSIS diff --git a/cpanfile b/cpanfile index ef894b3..13ab686 100644 --- a/cpanfile +++ b/cpanfile @@ -42,7 +42,7 @@ requires "YAML::XS" => "0"; requires "feature" => "0"; requires "namespace::autoclean" => "0"; requires "parent" => "0"; -requires "perl" => "5.013002"; +requires "perl" => "v5.16.0"; requires "strict" => "0"; requires "version" => "0"; requires "warnings" => "0"; @@ -54,12 +54,12 @@ on 'test' => sub { requires "IO::Handle" => "0"; requires "IPC::Open3" => "0"; requires "Test::More" => "0"; - requires "perl" => "5.013002"; + requires "perl" => "v5.16.0"; }; on 'configure' => sub { requires "ExtUtils::MakeMaker" => "0"; - requires "perl" => "5.013002"; + requires "perl" => "v5.16.0"; }; on 'develop' => sub { diff --git a/lib/App/ElasticSearch/Utilities.pm b/lib/App/ElasticSearch/Utilities.pm index 640388e..7ff0a4e 100644 --- a/lib/App/ElasticSearch/Utilities.pm +++ b/lib/App/ElasticSearch/Utilities.pm @@ -1,8 +1,7 @@ # ABSTRACT: Utilities for Monitoring ElasticSearch package App::ElasticSearch::Utilities; -use v5.10; -use strict; +use v5.16; use warnings; # VERSION @@ -62,10 +61,13 @@ use Sub::Exporter -setup => { es_apply_index_settings es_local_index_meta es_flatten_hash + es_human_count + es_human_size )], groups => { config => [qw(es_utils_initialize es_globals)], default => [qw(es_utils_initialize es_connect es_indices es_request)], + human => [qw(es_human_count es_human_size)], indices => [qw(:default es_indices_meta)], index => [qw(:default es_index_valid es_index_fields es_index_days_old es_index_bases)], }, @@ -1078,7 +1080,12 @@ sub es_index_strip_date { es_utils_initialize() unless keys %DEF; - if( $index =~ s/[-_]$PATTERN_REGEX{DATE}.*// ) { + # Try the Date Pattern + if( $index =~ s/[-_]$PATTERN_REGEX{DATE}.*//o ) { + return $index; + } + # Fallback to matching thing-YYYY-MM-DD or thing-YYYY.MM.DD + elsif( $index =~ s/[-_]\d{4}([.-])\d{2}\g{1}\d{2}(?:[-_.]\d+)?$// ) { return $index; } return; @@ -1465,6 +1472,53 @@ sub es_flatten_hash { return \%compat; } +=func es_human_count + +Takes a number and returns the number as a string in docs, thousands, millions, or billions. + + 1_000 -> "1.00 thousand", + 1_000_000 -> "1.00 million", + +=cut + +sub es_human_count { + my ($size) = @_; + + my $unit = 'docs'; + my @units = qw(thousand million billion); + + while( $size > 1000 && @units ) { + $size /= 1000; + $unit = shift @units; + } + + return sprintf "%0.2f %s", $size, $unit; +} + +=func es_human_size + +Takes a number and returns the number as a string in bytes, Kb, Mb, Gb, or Tb using base 1024. + + 1024 -> '1.00 Kb', + 1048576 -> '1.00 Mb', + 1073741824 -> '1.00 Gb', + +=cut + +sub es_human_size { + my ($size) = @_; + + my $unit = 'b'; + my @units = qw(Kb Mb Gb Tb); + + while( $size > 1024 && @units ) { + $size /= 1024; + $unit = shift @units; + } + + return sprintf "%0.2f %s", $size, $unit; +} + =func def('key') Exposes Definitions grabbed by options parsing diff --git a/lib/App/ElasticSearch/Utilities/Aggregations.pm b/lib/App/ElasticSearch/Utilities/Aggregations.pm index a673d13..f8fdacc 100644 --- a/lib/App/ElasticSearch/Utilities/Aggregations.pm +++ b/lib/App/ElasticSearch/Utilities/Aggregations.pm @@ -1,7 +1,7 @@ package App::ElasticSearch::Utilities::Aggregations; # ABSTRACT: Code to simplify creating and working with Elasticsearch aggregations -use strict; +use v5.16; use warnings; use Storable qw(dclone); @@ -539,6 +539,7 @@ sub es_flatten_aggregations { my $k = delete $result->{key}; my $ks = delete $result->{key_as_string}; push @{ $row }, $field, $ks || $k; + push @{ $row }, "$field.raw", $k if $ks; push @{ $row }, "$field.hits", delete $result->{doc_count} || 0; } my %buckets = (); diff --git a/lib/App/ElasticSearch/Utilities/Connection.pm b/lib/App/ElasticSearch/Utilities/Connection.pm index d6724d6..f9500b8 100644 --- a/lib/App/ElasticSearch/Utilities/Connection.pm +++ b/lib/App/ElasticSearch/Utilities/Connection.pm @@ -29,7 +29,7 @@ the API you'd expect from B. =cut -use strict; +use v5.16; use warnings; # VERSION diff --git a/lib/App/ElasticSearch/Utilities/HTTPRequest.pm b/lib/App/ElasticSearch/Utilities/HTTPRequest.pm index 172103d..6b08b14 100644 --- a/lib/App/ElasticSearch/Utilities/HTTPRequest.pm +++ b/lib/App/ElasticSearch/Utilities/HTTPRequest.pm @@ -9,7 +9,7 @@ use this module in your code. =cut -use strict; +use v5.16; use warnings; no warnings 'uninitialized'; diff --git a/lib/App/ElasticSearch/Utilities/Metrics.pm b/lib/App/ElasticSearch/Utilities/Metrics.pm new file mode 100644 index 0000000..6f8b53d --- /dev/null +++ b/lib/App/ElasticSearch/Utilities/Metrics.pm @@ -0,0 +1,360 @@ +package App::ElasticSearch::Utilities::Metrics; +# ABSTRACT: Fetches performance metrics about the node + +=head1 SYNOPSIS + +This provides a simple API to export some core metrics from the local +ElasticSearch instance. + + use App::ElasticSearch::Utilities qw(es_connect); + use App::ElasticSearch::Utilities::Metrics; + + my $metrics_fetcher = App::ElasticSearch::Utilities::Metrics->new( + connection => es_connect(), + with_cluster_metrics => 1, + with_index_metrics => 1, + ); + + my $metrics = $metrics_fetcher->get_metrics(); + +=cut + +use v5.16; +use warnings; + +# VERSION + +use App::ElasticSearch::Utilities qw(es_connect); +use CLI::Helpers qw(:output); +use JSON::MaybeXS; +use Ref::Util qw(is_ref is_arrayref is_hashref); +use Types::Standard qw( ArrayRef Bool HashRef InstanceOf Int Str ); + +use Moo; +use namespace::autoclean; + +=attr connection + +An `App::ElasticSearch::Utilities::Connection` instance, or automatically +created via C. + +=cut + +has 'connection' => ( + is => 'ro', + isa => InstanceOf['App::ElasticSearch::Utilities::Connection'], + default => sub { es_connect() }, + handles => [qw(host port request)], +); + +my @_IGNORES = qw( + id attributes timestamp upms _all _shards + bundled_jdk using_bundled_jdk + mlockall pid start_time_in_millis + settings +); + +=attr ignore + +An array of metric names to ignore, in addition to the static list when parsing +the `_node/_local` stats. + +=cut + +has 'ignore' => ( + is => 'lazy', + isa => ArrayRef[Str], + default => sub { [] }, +); + +=attr node_details + +The Node details provided by the C<_nodes/_local> API. + +=cut + +has 'node_details' => ( + is => 'lazy', + isa => HashRef, + init_arg => undef, +); + +sub _build_node_details { + my ($self) = @_; + + if( my $res = $self->request('_nodes/_local')->content ) { + if( my $nodes = $res->{nodes} ) { + my ($id) = keys %{ $nodes }; + return { + %{ $nodes->{$id} }, + id => $id, + } + } + } +} + +=attr node_id + +The Node ID for the connection, will be automatically discovered + +=cut + +has 'node_id' => ( + is => 'lazy', + isa => Str, +); + +sub _build_node_id { + my ($self) = @_; + + if( my $details = $self->node_details ) { + return $details->{id}; + } + + die sprintf "unable to determine node_id for %s:%d", + $self->host, $self->port; +} + +=attr with_cluster_metrics + +Boolean, set to true to collect cluster metrics in addition to node metrics + +=cut + +has 'with_cluster_metrics' => ( + is => 'lazy', + isa => Bool, + builder => sub { + my ($self) = @_; + if( my $info = $self->node_details ) { + return !!grep { $_ eq 'master' } @{ $info->{roles} }; + } + return 0; + }, +); + + +=attr with_index_metrics + +Boolean, set to true to collect index level metrics in addition to node metrics + +=cut + +has 'with_index_metrics' => ( + is => 'lazy', + isa => Bool, + builder => sub { + my ($self) = @_; + if( my $info = $self->node_details ) { + return !!grep { /^data/ } @{ $info->{roles} }; + } + return 0; + }, +); + +=method get_metrics() + +Retrieves the metrics from the local node. + +=cut + +sub get_metrics { + my ($self) = @_; + + # Fetch Node Local Stats + my @collected = $self->collect_node_metrics(); + + push @collected, $self->collect_cluster_metrics() + if $self->with_cluster_metrics; + + push @collected, $self->collect_index_metrics() + if $self->with_index_metrics; + + # Flatten Collected and Return the Stats + return \@collected; +} + +=method collect_node_metrics() + +Returns all relevant stats from the C<_nodes/_local> API + +=cut + +sub collect_node_metrics { + my ($self) = @_; + + if( my $res = $self->request('_nodes/_local')->content ) { + return $self->_stat_collector( $res->{nodes}{$self->node_id} ); + } +} + +=method collect_cluster_metrics() + +Return all relevant stats from the C<_cluster/health> API as well as a count of +`index.blocks.*` in place. + +=cut + +sub collect_cluster_metrics { + my ($self) = @_; + + my @stats = (); + + if( my $res = $self->request('_cluster/health')->content ) { + push @stats, + { key => "cluster.nodes.total", value => $res->{number_of_nodes} } , + { key => "cluster.nodes.data", value => $res->{number_of_data_nodes} }, + { key => "cluster.shards.primary", value => $res->{active_primary_shards} }, + { key => "cluster.shards.active", value => $res->{active_shards} }, + { key => "cluster.shards.initializing", value => $res->{initializing_shards} }, + { key => "cluster.shards.relocating", value => $res->{relocating_shards} }, + { key => "cluster.shards.unassigned", value => $res->{unassigned_shards} }, + ; + } + push @stats, $self->_collect_index_blocks(); + return @stats; +} + +sub _collect_index_blocks { + my ($self) = @_; + + my @req = ( + '_settings/index.blocks.*', + { + index => '_all', + uri_param => { + flat_settings => 'true', + }, + }, + ); + + if( my $res = $self->request(@req)->content ) { + my %collected=(); + foreach my $idx ( keys %{ $res } ) { + if( my $settings = $res->{$idx}{settings} ) { + foreach my $block ( keys %{ $settings } ) { + my $value = $settings->{$block}; + if( lc $value eq 'true') { + $collected{$block} ||= 0; + $collected{$block}++; + } + } + } + } + return map { { key => "cluster.$_", value => $collected{$_} } } sort keys %collected; + } + +} + +sub collect_index_metrics { + my ($self) = @_; + + my $id = $self->node_id; + my $shardres = $self->request('_cat/shards', + { + uri_param => { + local => 'true', + format => 'json', + bytes => 'b', + h => join(',', qw( index prirep docs store id state )), + } + } + )->content; + + my %results; + foreach my $shard ( @{ $shardres } ) { + # Skip unallocated shards + next unless $shard->{id}; + + # Skip unless this shard is allocated to this shard + next unless $shard->{id} eq $id; + + # Skip "Special" Indexes + next if $shard->{index} =~ /^\./; + + # Get Metadata + my $index = $shard->{index} =~ s/[-_]\d{4}([.-])\d{2}\g{1}\d{2}(?:[-_.]\d+)?$//r; + next unless $index; + + $index =~ s/\./_/g; + + my $type = $shard->{prirep} eq 'p' ? 'primary' : 'replica'; + + # Initialize + $results{$index} ||= { map { $_ => 0 } qw( docs bytes primary replica ) }; + $results{$index}->{state} ||= {}; + $results{$index}->{state}{$shard->{state}} ||= 0; + $results{$index}->{state}{$shard->{state}}++; + + # Add it up, Add it up + $results{$index}->{docs} += $shard->{docs}; + $results{$index}->{bytes} += $shard->{store}; + $results{$index}->{$type}++; + } + + my @results; + foreach my $idx (sort keys %results) { + foreach my $k ( sort keys %{ $results{$idx} } ) { + # Skip the complex + next if ref $results{$idx}->{$k}; + push @results, + { + key => sprintf("node.indices.%s.%s", $idx, $k), + value => $results{$idx}->{$k}, + }; + } + my $states = $results{$idx}->{state} || {}; + + foreach my $k ( sort keys %{ $states } ) { + push @results, + { + key => sprintf("node.indices.%s.state.%s", $idx, $k), + value => $states->{$k}, + }; + } + } + return @results; +} + +#------------------------------------------------------------------------# +# Parse Statistics Dynamically +sub _stat_collector { + my $self = shift; + my $ref = shift; + my @path = @_; + my @stats = (); + + # Base Case + return unless is_hashref($ref); + + my %ignores = map { $_ => 1 } @{ $self->ignore }, @_IGNORES; + foreach my $key (sort keys %{ $ref }) { + # Skip uninteresting keys + next if exists $ignores{$key}; + + # Skip peak values, we'll see those in the graphs. + next if $key =~ /^peak/; + + # Sanitize Key Name + my $key_name = $key; + $key_name =~ s/(?:time_)?in_millis/ms/; + $key_name =~ s/(?:size_)?in_bytes/bytes/; + $key_name =~ s/\./_/g; + + if( is_hashref($ref->{$key}) ) { + # Recurse + push @stats, $self->_stat_collector($ref->{$key},@path,$key_name); + } + elsif( $ref->{$key} =~ /^\d+(?:\.\d+)?$/ ) { + # Numeric + push @stats, { + key => join('.',@path,$key_name), + value => $ref->{$key}, + }; + } + } + + return @stats; +} + +__PACKAGE__->meta->make_immutable; diff --git a/lib/App/ElasticSearch/Utilities/QueryString.pm b/lib/App/ElasticSearch/Utilities/QueryString.pm index 359c05c..2bdaf2e 100644 --- a/lib/App/ElasticSearch/Utilities/QueryString.pm +++ b/lib/App/ElasticSearch/Utilities/QueryString.pm @@ -1,7 +1,7 @@ package App::ElasticSearch::Utilities::QueryString; # ABSTRACT: CLI query string fixer -use strict; +use v5.16; use warnings; # VERSION diff --git a/lib/App/ElasticSearch/Utilities/QueryString/AutoEscape.pm b/lib/App/ElasticSearch/Utilities/QueryString/AutoEscape.pm index 0e9b278..b08ea08 100644 --- a/lib/App/ElasticSearch/Utilities/QueryString/AutoEscape.pm +++ b/lib/App/ElasticSearch/Utilities/QueryString/AutoEscape.pm @@ -1,7 +1,7 @@ package App::ElasticSearch::Utilities::QueryString::AutoEscape; # ABSTRACT: Provides a prefix of '=' to use the term filter -use strict; +use v5.16; use warnings; # VERSION diff --git a/lib/App/ElasticSearch/Utilities/QueryString/BareWords.pm b/lib/App/ElasticSearch/Utilities/QueryString/BareWords.pm index bbd19da..f57a0ee 100644 --- a/lib/App/ElasticSearch/Utilities/QueryString/BareWords.pm +++ b/lib/App/ElasticSearch/Utilities/QueryString/BareWords.pm @@ -1,7 +1,7 @@ package App::ElasticSearch::Utilities::QueryString::BareWords; # ABSTRACT: Mostly fixing case and tracking dangling words -use strict; +use v5.16; use warnings; # VERSION diff --git a/lib/App/ElasticSearch/Utilities/QueryString/FileExpansion.pm b/lib/App/ElasticSearch/Utilities/QueryString/FileExpansion.pm index 9e724d0..6014ef8 100644 --- a/lib/App/ElasticSearch/Utilities/QueryString/FileExpansion.pm +++ b/lib/App/ElasticSearch/Utilities/QueryString/FileExpansion.pm @@ -1,7 +1,7 @@ package App::ElasticSearch::Utilities::QueryString::FileExpansion; # ABSTRACT: Build a terms query from unique values in a column of a file -use strict; +use v5.16; use warnings; # VERSION diff --git a/lib/App/ElasticSearch/Utilities/QueryString/IP.pm b/lib/App/ElasticSearch/Utilities/QueryString/IP.pm index 8ed8952..2f9d19c 100644 --- a/lib/App/ElasticSearch/Utilities/QueryString/IP.pm +++ b/lib/App/ElasticSearch/Utilities/QueryString/IP.pm @@ -1,7 +1,7 @@ package App::ElasticSearch::Utilities::QueryString::IP; # ABSTRACT: Expand IP CIDR Notation to ES ranges -use strict; +use v5.16; use warnings; # VERSION diff --git a/lib/App/ElasticSearch/Utilities/QueryString/Nested.pm b/lib/App/ElasticSearch/Utilities/QueryString/Nested.pm index dad1192..8d04e5f 100644 --- a/lib/App/ElasticSearch/Utilities/QueryString/Nested.pm +++ b/lib/App/ElasticSearch/Utilities/QueryString/Nested.pm @@ -1,7 +1,7 @@ package App::ElasticSearch::Utilities::QueryString::Nested; # ABSTRACT: Implement the proposed Elasticsearch nested query syntax -use strict; +use v5.16; use warnings; # VERSION diff --git a/lib/App/ElasticSearch/Utilities/QueryString/Plugin.pm b/lib/App/ElasticSearch/Utilities/QueryString/Plugin.pm index 7f65ba9..132fb9e 100644 --- a/lib/App/ElasticSearch/Utilities/QueryString/Plugin.pm +++ b/lib/App/ElasticSearch/Utilities/QueryString/Plugin.pm @@ -1,7 +1,7 @@ package App::ElasticSearch::Utilities::QueryString::Plugin; # ABSTRACT: Moo::Role for implementing QueryString Plugins -use strict; +use v5.16; use warnings; # VERSION diff --git a/lib/App/ElasticSearch/Utilities/QueryString/Ranges.pm b/lib/App/ElasticSearch/Utilities/QueryString/Ranges.pm index 4c93bf3..a910aa7 100644 --- a/lib/App/ElasticSearch/Utilities/QueryString/Ranges.pm +++ b/lib/App/ElasticSearch/Utilities/QueryString/Ranges.pm @@ -1,7 +1,7 @@ package App::ElasticSearch::Utilities::QueryString::Ranges; # ABSTRACT: Implement parsing comparison operators to Equivalent Lucene syntax -use strict; +use v5.16; use warnings; # VERSION diff --git a/lib/App/ElasticSearch/Utilities/QueryString/Underscored.pm b/lib/App/ElasticSearch/Utilities/QueryString/Underscored.pm index 4e11d36..235e348 100644 --- a/lib/App/ElasticSearch/Utilities/QueryString/Underscored.pm +++ b/lib/App/ElasticSearch/Utilities/QueryString/Underscored.pm @@ -1,7 +1,7 @@ package App::ElasticSearch::Utilities::QueryString::Underscored; # ABSTRACT: Extend some __ queries -use strict; +use v5.16; use warnings; # VERSION diff --git a/lib/App/ElasticSearch/Utilities/VersionHacks.pm b/lib/App/ElasticSearch/Utilities/VersionHacks.pm index 5162c2d..fb8fca5 100644 --- a/lib/App/ElasticSearch/Utilities/VersionHacks.pm +++ b/lib/App/ElasticSearch/Utilities/VersionHacks.pm @@ -1,7 +1,7 @@ # ABSTRACT: Fix version issues to support all the things package App::ElasticSearch::Utilities::VersionHacks; -use strict; +use v5.16; use warnings; use version; diff --git a/lib/Types/ElasticSearch.pm b/lib/Types/ElasticSearch.pm index 5e16a5a..8a049cb 100644 --- a/lib/Types/ElasticSearch.pm +++ b/lib/Types/ElasticSearch.pm @@ -1,7 +1,7 @@ package Types::ElasticSearch; # ABSTRACT: Types for working with ElasticSearch -use strict; +use v5.16; use warnings; # VERSION diff --git a/scripts/es-graphite-dynamic.pl b/scripts/es-graphite-dynamic.pl index 379f463..c13583f 100755 --- a/scripts/es-graphite-dynamic.pl +++ b/scripts/es-graphite-dynamic.pl @@ -4,7 +4,8 @@ use strict; use warnings; -use App::ElasticSearch::Utilities qw(es_request es_node_stats es_index_stats es_index_strip_date es_flatten_hash); +use App::ElasticSearch::Utilities qw(es_connect); +use App::ElasticSearch::Utilities::Metrics; use CLI::Helpers qw(:all); use Getopt::Long qw(:config no_ignore_case no_ignore_case_always); use IO::Socket::INET; @@ -20,8 +21,6 @@ 'carbon-proto=s', 'carbon-server=s', 'carbon-port=i', - 'with-indices', - 'with-cluster', 'prefix=s', 'no-prefix', 'help|h', @@ -36,8 +35,9 @@ #------------------------------------------------------------------------# # Argument Sanitazation + # Ignore uninteresting metrics -my @_IGNORE = qw(id attributes timestamp upms _all _shards); +my @_IGNORE = (); push @_IGNORE, split(/,/, $opt{ignore}) if exists $opt{ignore}; my %_IGNORE = map { $_ => 1 } @_IGNORE; # Merge options into config @@ -45,14 +45,17 @@ 'carbon-proto' => 'tcp', 'carbon-base' => 'general.es', %opt, - host => App::ElasticSearch::Utilities::def('HOST'), ); #------------------------------------------------------------------------# # Globals -my $TIME = time; -my $HOSTNAME = undef; -my $CLUSTER = undef; +my $TIME = time; +my $Fetcher = App::ElasticSearch::Utilities::Metrics->new( + connection => es_connect(), + %_IGNORE ? ( + ignore => [ sort keys %_IGNORE ] + ) : (), +); #------------------------------------------------------------------------# # Carbon Socket Creation @@ -70,32 +73,18 @@ #------------------------------------------------------------------------# # Collect and Decode the Cluster Statistics -my @metrics = exists $opt{'with-cluster'} ? cluster_health() : (); -my $stats = es_node_stats('_local'); -if( !$stats ) { - output({color=>'red'}, "Error retrieving nodes_stats()"); + +my @metrics = sort map { "$_->{key} $_->{value}" } @{ $Fetcher->get_metrics }; +if( !@metrics ) { + output({color=>'red'}, "Error retrieving metrics"); exit 1; } -push @metrics, @{ parse_nodes_stats($stats) }; -# Fetch Local Shard Data -push @metrics, local_shard_data(); - -# Collect individual indexes names and their own statistics -if( exists $cfg{'with-indices'} ) { - my $data = es_index_stats('_all'); - if( defined $data ) { - push @metrics, dynamic_stat_collector($data->{indices},'cluster',$CLUSTER); - } - else { - output({color=>'red'}, "Index stats requested, but response was empty."); - } -} #------------------------------------------------------------------------# # Send output to appropriate channels for ( @metrics ) { # Format - my $prefix = exists $cfg{prefix} ? $cfg{prefix} : join('.', $cfg{'carbon-base'}, $HOSTNAME); + my $prefix = exists $cfg{prefix} ? $cfg{prefix} : join('.', $cfg{'carbon-base'}, $Fetcher->node_details->{name}); s/^/$prefix./ unless $cfg{'no-prefix'}; s/$/ $TIME\n/; @@ -108,179 +97,6 @@ output({data=>1},$_); } } - -#------------------------------------------------------------------------# -# Basic Cluster Statistics -sub cluster_health { - my $result = es_request('_cluster/health'); - my @stats =(); - if( defined $result ) { - $CLUSTER ||= $result->{cluster_name}; - push @stats, - "cluster.nodes.total $result->{number_of_nodes}", - "cluster.nodes.data $result->{number_of_data_nodes}", - "cluster.shards.primary $result->{active_primary_shards}", - "cluster.shards.active $result->{active_shards}", - "cluster.shards.initializing $result->{initializing_shards}", - "cluster.shards.relocating $result->{relocating_shards}", - "cluster.shards.unassigned $result->{unassigned_shards}", - ; - } - push @stats, index_blocks(); - return @stats; -} -#------------------------------------------------------------------------# -# Index Blocks -sub index_blocks { - my $result = es_request('_settings/index.blocks.*', { index => '_all' }); - - my %collected=(); - foreach my $idx ( keys %{ $result } ) { - if( $result->{$idx}{settings} ) { - my $settings = es_flatten_hash( $result->{$idx}{settings} ); - foreach my $block ( keys %{ $settings } ) { - my $value = $settings->{$block}; - if( lc $value eq 'true') { - $collected{$block} ||= 0; - $collected{$block}++; - } - } - } - } - - return map { "cluster.$_ $collected{$_}" } sort keys %collected; -} -#------------------------------------------------------------------------# -# Local Shard Data -sub local_shard_data { - # Retrieve our local node id - my $result = es_request('_nodes/_local'); - return unless $result->{nodes}; - - my ($id) = keys %{ $result->{nodes} }; - - return unless $id; - - my $shardres = es_request('_cat/shards', - { - uri_param => { - local => 'true', - format => 'json', - bytes => 'b', - h => join(',', qw( index prirep docs store id state )), - } - } - ); - - my %results; - foreach my $shard ( @{ $shardres } ) { - # Skip unallocated shards - next unless $shard->{id}; - - # Skip unless this shard is allocated to this shard - next unless $shard->{id} eq $id; - - # Skip "Special" Indexes - next if $shard->{index} =~ /^\./; - - # Get Metadata - my $index = es_index_strip_date( $shard->{index} ); - next unless $index; - - $index =~ s/\./_/g; - - my $type = $shard->{prirep} eq 'p' ? 'primary' : 'replica'; - - # Initialize - $results{$index} ||= { map { $_ => 0 } qw( docs bytes primary replica ) }; - $results{$index}->{state} ||= {}; - $results{$index}->{state}{$shard->{state}} ||= 0; - $results{$index}->{state}{$shard->{state}}++; - - # Add it up, Add it up - $results{$index}->{docs} += $shard->{docs}; - $results{$index}->{bytes} += $shard->{store}; - $results{$index}->{$type}++; - } - - my @results; - foreach my $idx (sort keys %results) { - foreach my $k ( sort keys %{ $results{$idx} } ) { - # Skip the complex - next if ref $results{$idx}->{$k}; - push @results, - sprintf "node.indices.%s.%s %d", - $idx, $k, $results{$idx}->{$k}; - } - my $states = $results{$idx}->{state} || {}; - - foreach my $k ( sort keys %{ $states } ) { - push @results, - sprintf "node.indices.%s.state.%s %d", - $idx, $k, $states->{$k}; - } - } - return @results; - -} -#------------------------------------------------------------------------# -# Parse Statistics Dynamically -sub dynamic_stat_collector { - my $ref = shift; - my @path = @_; - my @stats = (); - - # Base Case - return unless is_hashref($ref); - - foreach my $key (sort keys %{ $ref }) { - # Skip uninteresting keys - next if exists $_IGNORE{$key}; - - # Skip peak values, we'll see those in the graphs. - next if $key =~ /^peak/; - - # Sanitize Key Name - my $key_name = $key; - $key_name =~ s/(?:time_)?in_millis/ms/; - $key_name =~ s/(?:size_)?in_bytes/bytes/; - $key_name =~ s/\./_/g; - - if( is_hashref($ref->{$key}) ) { - # Recurse - push @stats, dynamic_stat_collector($ref->{$key},@path,$key_name); - } - elsif( $ref->{$key} =~ /^\d+(?:\.\d+)?$/ ) { - # Numeric - push @stats, join('.',@path,$key_name) . " $ref->{$key}"; - } - } - - return @stats; -} - -#------------------------------------------------------------------------# -# Generate Nodes Statistics -sub parse_nodes_stats { - my $data = shift; - - # We are using _local, so we'll only have our target - # nodes data in the results, using the loop to grab - # the node_id, which is hashed. - my $node_id; - foreach my $id (keys %{ $data->{nodes} }) { - $node_id = $id; - $HOSTNAME=$data->{nodes}{$id}{name}; - last; - } - $CLUSTER ||= $data->{cluster_name}; - verbose("[$CLUSTER] Parsing node_stats for ID:$node_id => $HOSTNAME"); - my $node = $data->{nodes}{$node_id}; - - my @stats = dynamic_stat_collector($node); - return \@stats; -} - __END__ =head1 SYNOPSIS @@ -330,11 +146,6 @@ =head1 OPTIONS Use this port for the carbon server, useless without --carbon-server -=item B - -Also grab data at the individual index level, will not append hostnames as this is useless. It will -map the data into "$CARBON_BASE.cluster.$CLUSTERNAME.$INDEX..." - =item B A comma separated list of keys to ignore when parsing the tree. This is in addition to the diff --git a/scripts/es-index-fields.pl b/scripts/es-index-fields.pl new file mode 100755 index 0000000..655f2e1 --- /dev/null +++ b/scripts/es-index-fields.pl @@ -0,0 +1,150 @@ +#!perl +# PODNAME: es-index-fields.pl +# ABSTRACT: Show information on the fields storage usage +use strict; +use warnings; + +use App::ElasticSearch::Utilities qw(:all); +use CLI::Helpers qw(:all); +use Const::Fast; +use JSON::MaybeXS; +use Getopt::Long::Descriptive; +use Pod::Usage; + +#------------------------------------------------------------------------# +# Argument Collection +const my %DEFAULT => ( + duration => 'transient', + top => 10, +); +my ($opt,$usage) = describe_options('%c %o', + ['top|limit|size|n=i', "Show the top N fields, defaults to $DEFAULT{top}", + { default => $DEFAULT{top} }, + ], + ['no-meta-fields|N', "Disable showing meta fields starting with an underscore"], + [] , + ['help', 'Display this message', { shortcircuit => 1 }], + ['manual', 'Display full manual', { shortcircuit => 1 }], +); + +#------------------------------------------------------------------------# +# Documentations! +if( $opt->help ) { + print $usage->text; + exit 0; +} +pod2usage(-exitstatus => 0, -verbose => 2) if $opt->manual; + +#------------------------------------------------------------------------# +my $json = JSON->new->pretty->utf8->canonical; + +my %indices = map { $_ => (es_index_days_old($_) || 0) } es_indices(); + +const my @FieldStores => qw( + doc_values + norms + stored_fields + term_vectors + points +); + +my %Fields = (); + +foreach my $idx ( sort keys %indices ) { + output({clear=>1, color=>"cyan"}, "Getting field data for $idx"); + + # Get Field Data + my $result; + eval { + $result = es_request('_disk_usage', { + method => 'POST', + index => $idx, + uri_param => { + run_expensive_tasks => 'true' + }, + }); + 1; + } or do { + my $err = $@; + output({indent=>1, color=>'red'}, "Request Failed: $err"); + }; + $result //= {}; + + if( my $fields = $result->{$idx}{fields} ) { + my $by_size = sub { + $fields->{$b}{total_in_bytes} <=> $fields->{$a}{total_in_bytes} + }; + my $n = 0; + foreach my $field ( sort $by_size keys %{ $fields } ) { + # Skip meta fields + next if $opt->no_meta_fields && $field =~ /^_/; + + # Collect field totals + my $data = $fields->{$field}; + $Fields{$field} += $data->{total_in_bytes}; + + # Skip the output, but collect all the datas + $n++; + next if $n > $opt->top; + + output({indent=>1,kv=>1,color=>color_pick($data->{total_in_bytes})}, $field => $data->{total}); + foreach my $k ( @FieldStores ) { + if( $data->{"${k}_in_bytes"} > 0 ) { + output({indent=>2,kv=>1,color=>color_pick($data->{"${k}_in_bytes"})}, $k => $data->{$k} ); + } + } + } + } + else { + output({indent=>1, color=>'red'}, "Failed retrieving field storage information"); + } + + if ( my $totals = $result->{$idx}{all_fields} ) { + output({clear=>1,indent=>1,color=>'cyan'}, "All Fields ($idx):"); + output({indent=>2,kv=>1,color=>color_pick($totals->{total_in_bytes})}, total => $totals->{total}); + + foreach my $k ( @FieldStores ) { + if( $totals->{"${k}_in_bytes"} > 0 ) { + output({indent=>3,kv=>1,color=>color_pick($totals->{"${k}_in_bytes"})}, $k => $totals->{$k} ); + } + } + } +} + +output({clear=>1,color=>'yellow'}, "Totals for fields in all indexes"); +my $n = 0; +foreach my $k ( sort { $Fields{$b} <=> $Fields{$a} } keys %Fields ) { + output({indent=>1,kv=>1,color=>color_pick($Fields{$k})}, $k, es_human_size($Fields{$k})); + $n++; + last if $n >= $opt->top; +} + +sub color_pick { + my ($v) = @_; + return + $v > 1024 * 1024 * 1024 * 10 ? 'red' + : $v > 1024 * 1024 * 1024 ? 'yellow' + : 'green'; +} + +__END__ + +=head1 SYNOPSIS + +es-index-fields.pl --index my-index-001 + +Options: + + + --help print help + --manual print full manual + +=from_other App::ElasticSearch::Utilities / ARGS / all + +=from_other CLI::Helpers / ARGS / all + +=head1 DESCRIPTION + +This script allows you to see the storage usage by field + +=cut diff --git a/scripts/es-index-scan.pl b/scripts/es-index-scan.pl new file mode 100644 index 0000000..654a3c8 --- /dev/null +++ b/scripts/es-index-scan.pl @@ -0,0 +1,50 @@ +#!perl +# PODNAME: es-index-scan.pl +# ABSTRACT: Scan indexes for potential issues +use v5.16; +use warnings; + +use App::ElasticSearch::Utilities qw(:all); +use CLI::Helpers qw(:output); + +my $indexes = es_indices(check_dates => 0); + +foreach my $idx ( sort @{ $indexes } ) { + my $age = es_index_days_old( $idx ); + my $result = es_request("/$idx/_stats"); + my $stats = $result->{indices}{$idx}{primaries}; + + my $doc_size = es_human_count($stats->{docs}{count} || 0); + my $size = es_human_size($stats->{store}{size_in_bytes}); + output("checking $idx.. (age=${age}d, docs=$doc_size, size=$size)"); + + my $segments = $stats->{segments}{count}; + my $shards = $stats->{shard_stats}{total_count}; + + if( my $docs = $stats->{docs}{count} ) { + my $deleted = $stats->{docs}{deleted}; + output({indent=>1,color=>'blue'}, sprintf "%d of %d (%0.1f%%) documents deleted", + $deleted, $docs, ($deleted / $docs) * 100, + ) if $deleted; + } + else { + output({indent=>1,color=>'red'}, "no documents") + } + + output({indent=>1,color=>'red'}, "More than one segment per shard: segments=$segments, shards=$shards") + if $age > 1 && $segments > $shards; + + + output({indent=>1,color=>'magenta'}, "index never queried") + unless $stats->{search}{query_total}; + + output({indent=>1,color=>'yellow'}, "$stats->{indexing}{index_failed} index failures") + if $stats->{indexing}{index_failed}; + + output({indent=>1,color=>'red'}, "indexing throttled") + if $stats->{indexing}{is_throttled}; + + output({indent=>1,color=>'yellow'}, sprintf "%0.3fs of throttled indexing", $stats->{indexing}{throttle_time_in_millis} / 1000) + if $stats->{indexing}{throttle_time_in_millis}; +} + diff --git a/t/04-aggregate.t b/t/04-aggregate-expand.t similarity index 100% rename from t/04-aggregate.t rename to t/04-aggregate-expand.t diff --git a/t/05-aggregate-flatten.t b/t/05-aggregate-flatten.t new file mode 100644 index 0000000..6013658 --- /dev/null +++ b/t/05-aggregate-flatten.t @@ -0,0 +1,74 @@ +#!perl +# +use strict; +use warnings; + +use App::ElasticSearch::Utilities::Aggregations; +use CLI::Helpers qw(:output); +use Data::Dumper; +use Test::More; +use YAML::XS qw(); + +$Data::Dumper::Indent = 1; +$Data::Dumper::Sortkeys = 1; + +# Aggregation Flattening Tests +my $tests = YAML::XS::Load(join '', ); + +foreach my $t (sort keys %{ $tests }) { + my $flat = es_flatten_aggs( $tests->{$t}{aggregations} ); + + is_deeply( $flat, $tests->{$t}{expected}, $t ) + or diag( Dumper $flat ); +} +done_testing(); + +__DATA__ +--- +00-simple_terms_agg: + aggregations: + ip: + buckets: + - key: 1.2.3.4 + doc_count: 13 + expected: + - + - ip + - 1.2.3.4 + - ip.hits + - 13 +01-simple_embedded_terms: + aggregations: + ip: + buckets: + - key: 1.2.3.4 + doc_count: 13 + ports: + buckets: + - key: 53 + doc_count: 13 + expected: + - + - ip + - 1.2.3.4 + - ip.hits + - 13 + - ports + - 53 + - ports.hits + - 13 +00-simple_terms_agg_key_as_string: + aggregations: + ip: + buckets: + - key: 1.2.3.4 + key_as_string: "one dot two dot three dot four" + doc_count: 13 + expected: + - + - ip + - one dot two dot three dot four + - ip.raw + - 1.2.3.4 + - ip.hits + - 13