From 0de5018a6171367a70f16bb89f80d2b898119f40 Mon Sep 17 00:00:00 2001 From: WenyXu Date: Thu, 7 Nov 2024 06:24:11 +0000 Subject: [PATCH] chore: add more logs --- .../src/engine/region_metadata.rs | 53 +++++++++++++------ src/mito2/src/metrics.rs | 2 + src/mito2/src/sst/parquet/helper.rs | 19 +++++-- src/store-api/src/metadata.rs | 11 ++-- 4 files changed, 64 insertions(+), 21 deletions(-) diff --git a/src/metric-engine/src/engine/region_metadata.rs b/src/metric-engine/src/engine/region_metadata.rs index 171480c58978..0a85427bad3c 100644 --- a/src/metric-engine/src/engine/region_metadata.rs +++ b/src/metric-engine/src/engine/region_metadata.rs @@ -14,6 +14,7 @@ //! Implementation of retrieving logical region's region metadata. +use common_telemetry::info; use store_api::metadata::ColumnMetadata; use store_api::storage::RegionId; @@ -21,25 +22,11 @@ use crate::engine::MetricEngineInner; use crate::error::Result; impl MetricEngineInner { - /// Load column metadata of a logical region. - /// - /// The return value is ordered on column name. - pub async fn load_logical_columns( + async fn get_logical_columns( &self, physical_region_id: RegionId, logical_region_id: RegionId, ) -> Result> { - // First try to load from state cache - if let Some(columns) = self - .state - .read() - .unwrap() - .logical_columns() - .get(&logical_region_id) - { - return Ok(columns.clone()); - } - // Else load from metadata region and update the cache. let _read_guard = self .metadata_region @@ -65,6 +52,42 @@ impl MetricEngineInner { Ok(logical_column_metadata) } + /// Load column metadata of a logical region. + /// + /// The return value is ordered on column name. + pub async fn load_logical_columns( + &self, + physical_region_id: RegionId, + logical_region_id: RegionId, + ) -> Result> { + // First try to load from state cache + if logical_region_id == RegionId::new(1258, 0) { + let columns_without_cache = self + .get_logical_columns(physical_region_id, logical_region_id) + .await?; + info!( + "The column metadata without cache: {:?}", + columns_without_cache + ); + } + + if let Some(columns) = self + .state + .read() + .unwrap() + .logical_columns() + .get(&logical_region_id) + { + if logical_region_id == RegionId::new(1258, 0) { + info!("The column metadata in cache: {:?}", columns); + } + return Ok(columns.clone()); + } + + self.get_logical_columns(physical_region_id, logical_region_id) + .await + } + /// Load logical column names of a logical region. /// /// The return value is ordered on column name alphabetically. diff --git a/src/mito2/src/metrics.rs b/src/mito2/src/metrics.rs index bc110494cf47..d9a953301e7b 100644 --- a/src/mito2/src/metrics.rs +++ b/src/mito2/src/metrics.rs @@ -73,6 +73,8 @@ lazy_static! { /// Histogram of flushed bytes. pub static ref FLUSH_BYTES_TOTAL: IntCounter = register_int_counter!("greptime_mito_flush_bytes_total", "mito flush bytes total").unwrap(); + pub static ref READ_BYTES_TOTAL: IntCounterVec = + register_int_counter_vec!("greptime_mito_read_bytes_total", "mito read bytes total", &[TYPE_LABEL]).unwrap(); // ------ End of flush related metrics diff --git a/src/mito2/src/sst/parquet/helper.rs b/src/mito2/src/sst/parquet/helper.rs index e80f751af982..cd23eac7796d 100644 --- a/src/mito2/src/sst/parquet/helper.rs +++ b/src/mito2/src/sst/parquet/helper.rs @@ -25,6 +25,7 @@ use snafu::ResultExt; use crate::error; use crate::error::Result; +use crate::metrics::READ_BYTES_TOTAL; // Refer to https://github.com/apache/arrow-rs/blob/7e134f4d277c0b62c27529fc15a4739de3ad0afd/parquet/src/file/footer.rs#L74-L90 /// Convert [format::FileMetaData] to [ParquetMetaData] @@ -100,7 +101,10 @@ pub async fn fetch_byte_ranges( object_store: ObjectStore, ranges: &[Range], ) -> object_store::Result> { - Ok(object_store + let schema = object_store.info().scheme().into_static(); + let mut total = 0; + + let bytes = object_store .reader_with(file_path) .concurrent(FETCH_PARALLELISM) .gap(MERGE_GAP) @@ -108,6 +112,15 @@ pub async fn fetch_byte_ranges( .fetch(ranges.to_vec()) .await? .into_iter() - .map(|buf| buf.to_bytes()) - .collect::>()) + .map(|buf| { + total += buf.len(); + buf.to_bytes() + }) + .collect::>(); + + READ_BYTES_TOTAL + .with_label_values(&[schema]) + .inc_by(total as u64); + + Ok(bytes) } diff --git a/src/store-api/src/metadata.rs b/src/store-api/src/metadata.rs index 039ba4885f3b..1d0fa44015a0 100644 --- a/src/store-api/src/metadata.rs +++ b/src/store-api/src/metadata.rs @@ -27,7 +27,7 @@ use api::v1::SemanticType; use common_error::ext::ErrorExt; use common_error::status_code::StatusCode; use common_macro::stack_trace_debug; -use common_telemetry::info; +use common_telemetry::{error, info}; use datatypes::arrow::datatypes::FieldRef; use datatypes::schema::{ColumnSchema, Schema, SchemaRef}; use serde::de::Error; @@ -336,12 +336,13 @@ impl RegionMetadata { // is unique in `Schema` so we only check column id here. if id_names.contains_key(&col.column_id) { info!( - "The column {} and {} have the same column id {}, region_id: {}, schema: {:?}", + "The column {} and {} have the same column id {}, region_id: {}, schema: {:?}, column metadatas: {:?}", id_names[&col.column_id], col.column_schema.name, col.column_id, self.region_id, self.schema, + self.column_metadatas, ); } ensure!( @@ -543,7 +544,11 @@ impl RegionMetadataBuilder { /// Consumes the builder and build a [RegionMetadata]. pub fn build(self) -> Result { - let skipped = SkippedFields::new(&self.column_metadatas)?; + let skipped = SkippedFields::new(&self.column_metadatas).inspect_err(|err| { + error!(err; "Skipped fields, column_metadatas: {:?}", + self.column_metadatas + ); + })?; let meta = RegionMetadata { schema: skipped.schema,