From 38ac863fbb51806ba40f48ce13ff3476a00b01be Mon Sep 17 00:00:00 2001 From: Darun Seethammagari Date: Wed, 24 Jul 2024 05:15:39 +0530 Subject: [PATCH] feat: Add metric for receiver block backfill failures (#912) Adds a metric for tracking receiver block backfill status. Since the backfill has an end state, I can't rely on constantly incrementing a counter. Instead, I use a gauge and check for non zero values. If the backfill starts and completes without issue, the value will be 0. But, if the backfill fails, it will increment the gauge and then skip to Lake backfill, leaving the gauge at a nonzero value. We can alert on this. If the stream is restarted, then a successful attempt at the backfill will reset the gauge to 0. --- block-streamer/src/block_stream.rs | 8 ++++++++ block-streamer/src/graphql/client.rs | 6 ++++++ block-streamer/src/metrics.rs | 6 ++++++ 3 files changed, 20 insertions(+) diff --git a/block-streamer/src/block_stream.rs b/block-streamer/src/block_stream.rs index eff2a0d1..a9af3f10 100644 --- a/block-streamer/src/block_stream.rs +++ b/block-streamer/src/block_stream.rs @@ -409,9 +409,14 @@ async fn process_bitmap_indexer_blocks( let mut last_published_block_height: u64 = start_block_height; + let indexer_name = indexer.get_full_name(); + while let Some(block_height_result) = matching_block_heights.next().await { match block_height_result { Ok(block_height) => { + metrics::RECEIVER_BLOCKS_FAILURE + .with_label_values(&[&indexer_name]) + .set(0); redis .publish_block(indexer, redis_stream.clone(), block_height, MAX_STREAM_SIZE) .await?; @@ -422,6 +427,9 @@ async fn process_bitmap_indexer_blocks( last_published_block_height = block_height; } Err(err) => { + metrics::RECEIVER_BLOCKS_FAILURE + .with_label_values(&[&indexer_name]) + .inc(); tracing::error!( "Backfill using bitmap indexer failed unexpectedly: {:?}", err diff --git a/block-streamer/src/graphql/client.rs b/block-streamer/src/graphql/client.rs index 38aad637..516f2b3c 100644 --- a/block-streamer/src/graphql/client.rs +++ b/block-streamer/src/graphql/client.rs @@ -57,6 +57,12 @@ impl GraphQLClientImpl { .json(&body) .send() .await?; + if reqwest_response.status() != 200 { + tracing::error!( + "GraphQL query failed with status code: {}", + reqwest_response.status() + ); + } reqwest_response.json().await } diff --git a/block-streamer/src/metrics.rs b/block-streamer/src/metrics.rs index 1f01a071..ec166e83 100644 --- a/block-streamer/src/metrics.rs +++ b/block-streamer/src/metrics.rs @@ -63,6 +63,12 @@ lazy_static! { &["indexer"] ) .unwrap(); + pub static ref RECEIVER_BLOCKS_FAILURE: IntGaugeVec = register_int_gauge_vec!( + "queryapi_block_streamer_receiver_blocks_failure", + "Gauge which only has a nonzero value if an error occurs during receiver block backfill", + &["indexer"] + ) + .unwrap(); } pub struct LogCounter;