From b8f619b11fa51a7c86c05b11d7b45451d5dc0b5e Mon Sep 17 00:00:00 2001 From: sundengyu Date: Mon, 23 Sep 2024 14:12:21 +0800 Subject: [PATCH] feat(kstat): enable kstat print Signed-off-by: sundengyu --- include/libuzfs.h | 6 +- include/sync_ops.h | 18 +- include/sys/spa_impl.h | 1 + include/sys/vdev_impl.h | 1 + include/sys/zfs_context.h | 23 ++- include/sys/zio.h | 2 + include/sys/zio_impl.h | 2 + lib/libspl/assert.c | 3 +- lib/libspl/include/sys/kstat.h | 28 +++ lib/libuzfs/libuzfs.c | 32 +++- lib/libzpool/kernel.c | 315 +++++++++++++++++++++++++++++++-- module/zfs/spa_stats.c | 10 ++ module/zfs/vdev.c | 2 + module/zfs/zio.c | 28 ++- 14 files changed, 448 insertions(+), 23 deletions(-) diff --git a/include/libuzfs.h b/include/libuzfs.h index df2fe0a13a7a..013bfd7c3173 100644 --- a/include/libuzfs.h +++ b/include/libuzfs.h @@ -102,7 +102,7 @@ extern void libuzfs_set_ops(const coroutine_ops_t *, const co_mutex_ops_t *, const co_cond_ops_t *, const co_rwlock_ops_t *, const aio_ops_t *, const thread_ops_t *, const taskq_ops_t *, - void (*print_log)(const char *, int)); + const stat_ops_t *); // only have effect when in debug binary extern void libuzfs_enable_debug_msg(void); @@ -132,7 +132,7 @@ extern int libuzfs_zpool_prop_get(libuzfs_zpool_handle_t *zhp, extern int libuzfs_dataset_create(const char *dsname); extern void libuzfs_dataset_destroy(const char *dsname); extern libuzfs_dataset_handle_t *libuzfs_dataset_open(const char *dsname, - int *err, uint32_t dnodesize, uint32_t max_blksz); + int *err, uint32_t dnodesize, uint32_t max_blksz, const void *metrics); extern void libuzfs_dataset_close(libuzfs_dataset_handle_t *dhp); extern uint64_t libuzfs_dataset_get_superblock_ino( @@ -295,6 +295,8 @@ extern int libuzfs_object_next_block(libuzfs_inode_handle_t *ihp, uint64_t *offset, uint64_t *size); extern void libuzfs_debug_main(int argc, char **argv); + +extern void libuzfs_show_stats(void *, int, const seq_file_generator_t *); #ifdef __cplusplus } #endif diff --git a/include/sync_ops.h b/include/sync_ops.h index 421b4de51c0b..a31a23f7c37b 100644 --- a/include/sync_ops.h +++ b/include/sync_ops.h @@ -124,13 +124,11 @@ typedef struct aio_ops { typedef uint64_t uthread_create_func_t(void (*)(void *), void *, int); typedef void uthread_exit_func_t(void); typedef void uthread_join_func_t(uint64_t); -typedef void backtrace_func_t(void); typedef struct thread_ops { uthread_create_func_t *uthread_create; uthread_exit_func_t *uthread_exit; uthread_join_func_t *uthread_join; - backtrace_func_t *backtrace; } thread_ops_t; typedef void *taskq_create_func_t(const char *, int); @@ -161,4 +159,20 @@ typedef struct taskq_ops { taskq_nalloc_func_t *taskq_nalloc; } taskq_ops_t; +typedef void print_log_func_t(const char *, int); +typedef void kstat_install_func_t(const char *, void *, int); +typedef void kstat_uninstall_func_t(const char *); +typedef void backtrace_func_t(void); +typedef void record_txg_delay_func_t(const void *, int, uint64_t); +typedef void record_zio_func_t(const void *, const int64_t *, int); + +typedef struct stat_ops { + print_log_func_t *print_log; + kstat_install_func_t *kstat_install; + kstat_uninstall_func_t *kstat_uinstall; + backtrace_func_t *backtrace; + record_txg_delay_func_t *record_txg_delays; + record_zio_func_t *record_zio; +} stat_ops_t; + #endif diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index c8987e2e67e9..0f32581219a1 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -440,6 +440,7 @@ struct spa { zfs_refcount_t spa_refcount; /* number of opens */ taskq_t *spa_upgrade_taskq; /* taskq for upgrade jobs */ + const void *metrics; /* pointers point to metrics */ }; extern char *spa_config_path; diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index 3c10f0cc2bf8..f7212c2537a3 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -465,6 +465,7 @@ struct vdev { zfs_ratelimit_t vdev_delay_rl; zfs_ratelimit_t vdev_deadman_rl; zfs_ratelimit_t vdev_checksum_rl; + const void *metrics; }; #define VDEV_PAD_SIZE (8 << 10) diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index 005b2f882dc8..66345d7552da 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -416,16 +416,26 @@ extern void kstat_set_raw_ops(kstat_t *ksp, * procfs list manipulation */ +struct procfs_list; +struct seq_file; + typedef struct procfs_list { void *pl_private; kmutex_t pl_lock; list_t pl_list; uint64_t pl_next_id; size_t pl_node_offset; + char name[KSTAT_STRLEN]; + int (*show)(struct seq_file *, void *); + int (*show_header)(struct seq_file *); + int (*clear)(struct procfs_list *); } procfs_list_t; #ifndef __cplusplus -struct seq_file { }; +struct seq_file { + char *buf; + int size; +}; void seq_printf(struct seq_file *m, const char *fmt, ...); typedef struct procfs_list_node { @@ -447,6 +457,17 @@ void procfs_list_destroy(procfs_list_t *procfs_list); void procfs_list_add(procfs_list_t *procfs_list, void *p); #endif +#define KSTAT_PROCFS 1 +#define KSTAT_NORMAL 2 + +typedef struct seq_file_generator { + void (*generate)(void *, struct seq_file *); + void *arg; +} seq_file_generator_t; + +void show_procfs_content(procfs_list_t *, const seq_file_generator_t *); +void show_kstat_content(kstat_t *, const seq_file_generator_t *); + /* * Task queues */ diff --git a/include/sys/zio.h b/include/sys/zio.h index c4e7507b2fd2..7106873c9577 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -482,6 +482,8 @@ struct zio { metaslab_class_t *io_metaslab_class; /* dva throttle class */ uint64_t io_offset; + /* time each io stage start */ + int64_t io_stage_start[ZIO_PIPELINE_STAGES]; hrtime_t io_timestamp; /* submitted at */ hrtime_t io_queued_timestamp; hrtime_t io_target_timestamp; diff --git a/include/sys/zio_impl.h b/include/sys/zio_impl.h index 4c998571653a..c5a88fe45fff 100644 --- a/include/sys/zio_impl.h +++ b/include/sys/zio_impl.h @@ -151,6 +151,8 @@ enum zio_stage { ZIO_STAGE_DONE = 1 << 24 /* RWFCI */ }; +#define ZIO_PIPELINE_STAGES 25 + #define ZIO_INTERLOCK_STAGES \ (ZIO_STAGE_READY | \ ZIO_STAGE_DONE) diff --git a/lib/libspl/assert.c b/lib/libspl/assert.c index 3b2b8eb5d213..00be2e599dff 100644 --- a/lib/libspl/assert.c +++ b/lib/libspl/assert.c @@ -29,6 +29,7 @@ int aok = 0; extern thread_ops_t thread_ops; +extern stat_ops_t stat_ops; /* printf version of libspl_assert */ void @@ -46,7 +47,7 @@ libspl_assertf(const char *file, const char *func, int line, return; } - thread_ops.backtrace(); + stat_ops.backtrace(); abort(); } diff --git a/lib/libspl/include/sys/kstat.h b/lib/libspl/include/sys/kstat.h index f73fb92eb797..f0db71c76306 100644 --- a/lib/libspl/include/sys/kstat.h +++ b/lib/libspl/include/sys/kstat.h @@ -61,6 +61,8 @@ typedef int kid_t; /* unique kstat id */ #define KSTAT_STRLEN 255 /* 254 chars + NULL; must be 16 * n - 1 */ + +#ifdef _KERNEL /* * The generic kstat header */ @@ -91,6 +93,32 @@ typedef struct kstat { int (*ks_snapshot)(struct kstat *, void *, int); void *ks_lock; /* protects this kstat's data */ } kstat_t; +#else + +struct kstat; + +typedef struct kstat_raw_ops { + int (*headers)(char *buf, size_t size); + int (*data)(char *buf, size_t size, void *data); + void *(*addr)(struct kstat *ksp, loff_t index); +} kstat_raw_ops_t; + +typedef struct kstat { + char ks_name[KSTAT_STRLEN]; /* kstat name */ + uchar_t ks_type; /* kstat data type */ + uchar_t ks_flags; /* kstat flags */ + void *ks_data; /* kstat type-specific data */ + uint_t ks_ndata; /* # of type-specific data records */ + size_t ks_data_size; /* total size of kstat data section */ + kstat_raw_ops_t raw_ops; + /* + * Fields relevant to kernel only + */ + int (*ks_update)(struct kstat *, int); /* dynamic update */ + void *ks_private; /* arbitrary provider-private data */ + void *ks_lock; /* protects this kstat's data */ +} kstat_t; +#endif #ifdef _SYSCALL32 diff --git a/lib/libuzfs/libuzfs.c b/lib/libuzfs/libuzfs.c index cadd370e455f..b453280ce078 100644 --- a/lib/libuzfs/libuzfs.c +++ b/lib/libuzfs/libuzfs.c @@ -278,14 +278,14 @@ fatal(int do_perror, char *message, ...) } extern aio_ops_t aio_ops; -extern void (*print_log)(const char *, int); +extern stat_ops_t stat_ops; void libuzfs_set_ops(const coroutine_ops_t *co, const co_mutex_ops_t *mo, const co_cond_ops_t *condo, const co_rwlock_ops_t *ro, const aio_ops_t *ao, const thread_ops_t *tho, const taskq_ops_t *tqo, - void (*pl)(const char *, int)) + const stat_ops_t *sto) { co_ops = *co; co_mutex_ops = *mo; @@ -294,7 +294,23 @@ libuzfs_set_ops(const coroutine_ops_t *co, aio_ops = *ao; thread_ops = *tho; taskq_ops = *tqo; - print_log = pl; + stat_ops = *sto; +} + +void +libuzfs_show_stats(void *stat, int type, + const seq_file_generator_t *generator) +{ + switch (type) { + case KSTAT_PROCFS: + show_procfs_content(stat, generator); + break; + case KSTAT_NORMAL: + show_kstat_content(stat, generator); + break; + default: + panic("unknown stat type: %d\n", type); + } } static uint64_t @@ -1338,8 +1354,8 @@ libuzfs_dhp_fini(libuzfs_dataset_handle_t *dhp) } libuzfs_dataset_handle_t * -libuzfs_dataset_open(const char *dsname, int *err, - uint32_t dnodesize, uint32_t max_blksz) +libuzfs_dataset_open(const char *dsname, int *err, uint32_t dnodesize, + uint32_t max_blksz, const void *metrics) { libuzfs_dataset_handle_t *dhp = NULL; objset_t *os = NULL; @@ -1354,6 +1370,12 @@ libuzfs_dataset_open(const char *dsname, int *err, return (NULL); } + if (metrics != NULL) { + os->os_spa->metrics = metrics; + vdev_t *root_vdev = os->os_spa->spa_root_vdev; + root_vdev->metrics = metrics; + } + libuzfs_dhp_init(dhp, os, dnodesize); return (dhp); diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index f29649b18489..8b170afd0e0d 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -24,6 +24,9 @@ * Copyright (c) 2016 Actifio, Inc. All rights reserved. */ +#include "sys/kmem.h" +#include "sys/kstat.h" +#include "sys/list.h" #include "sys/stdtypes.h" #include #include @@ -32,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -77,6 +81,7 @@ co_mutex_ops_t co_mutex_ops = {NULL}; co_cond_ops_t co_cond_ops = {NULL}; co_rwlock_ops_t co_rwlock_ops = {NULL}; thread_ops_t thread_ops = {NULL}; +stat_ops_t stat_ops = {NULL}; /* * ========================================================================= @@ -162,25 +167,282 @@ kstat_t * kstat_create(const char *module, int instance, const char *name, const char *class, uchar_t type, ulong_t ndata, uchar_t ks_flag) { - return (NULL); + kstat_t *ks = kmem_zalloc(sizeof (kstat_t), KM_SLEEP); + snprintf(ks->ks_name, KSTAT_STRLEN, "%s/%s/%s", module, class, name); + ks->ks_type = type; + ks->ks_flags = ks_flag; + ks->ks_ndata = ndata; + switch (type) { + case KSTAT_TYPE_RAW: + ks->ks_ndata = 1; + ks->ks_data_size = ndata; + break; + case KSTAT_TYPE_NAMED: + ks->ks_data_size = ndata * sizeof (kstat_named_t); + break; + case KSTAT_TYPE_INTR: + ks->ks_data_size = ndata * sizeof (kstat_intr_t); + break; + case KSTAT_TYPE_IO: + ks->ks_data_size = ndata * sizeof (kstat_io_t); + break; + case KSTAT_TYPE_TIMER: + ks->ks_data_size = ndata * sizeof (kstat_timer_t); + break; + default: + panic("Undefined kstat type %d\n", type); + } + + VERIFY((ks_flag & KSTAT_FLAG_VIRTUAL) != 0); + + return (ks); } /*ARGSUSED*/ void kstat_install(kstat_t *ksp) -{} +{ + stat_ops.kstat_install(ksp->ks_name, ksp, KSTAT_NORMAL); +} /*ARGSUSED*/ void kstat_delete(kstat_t *ksp) -{} +{ + stat_ops.kstat_uinstall(ksp->ks_name); + kmem_free(ksp, sizeof (kstat_t)); +} void kstat_set_raw_ops(kstat_t *ksp, int (*headers)(char *buf, size_t size), int (*data)(char *buf, size_t size, void *data), void *(*addr)(kstat_t *ksp, loff_t index)) -{} +{ + ksp->raw_ops.headers = headers; + ksp->raw_ops.data = data; + ksp->raw_ops.addr = addr; +} + +static void +show_kstat_header(kstat_t *ks, struct seq_file *sf) +{ + switch (ks->ks_type) { + case KSTAT_TYPE_RAW: + if (ks->raw_ops.headers) { + (void) ks->raw_ops.headers(sf->buf, sf->size); + } else { + seq_printf(sf, "raw data\n"); + } + break; + case KSTAT_TYPE_NAMED: + seq_printf(sf, "%-31s %-4s %s\n", + "name", "type", "data"); + break; + case KSTAT_TYPE_INTR: + seq_printf(sf, "%-8s %-8s %-8s %-8s %-8s\n", + "hard", "soft", "watchdog", + "spurious", "multsvc"); + break; + case KSTAT_TYPE_IO: + seq_printf(sf, + "%-8s %-8s %-8s %-8s %-8s %-8s " + "%-8s %-8s %-8s %-8s %-8s %-8s\n", + "nread", "nwritten", "reads", "writes", + "wtime", "wlentime", "wupdate", + "rtime", "rlentime", "rupdate", + "wcnt", "rcnt"); + break; + case KSTAT_TYPE_TIMER: + seq_printf(sf, + "%-31s %-8s " + "%-8s %-8s %-8s %-8s %-8s\n", + "name", "events", "elapsed", + "min", "max", "start", "stop"); + break; + default: + panic("Undefined kstat type %d\n", ks->ks_type); + } +} + +static int +kstat_seq_show_raw(struct seq_file *f, unsigned char *p, int l) +{ + int i, j; + + for (i = 0; ; i++) { + seq_printf(f, "%03x:", i); + + for (j = 0; j < 16; j++) { + if (i * 16 + j >= l) { + seq_printf(f, "\n"); + goto out; + } + + seq_printf(f, " %02x", (unsigned char)p[i * 16 + j]); + } + seq_printf(f, "\n"); + } +out: + return (0); +} + +static int +kstat_seq_show_named(struct seq_file *f, kstat_named_t *knp) +{ + seq_printf(f, "%-31s %-4d ", knp->name, knp->data_type); + + switch (knp->data_type) { + case KSTAT_DATA_CHAR: + knp->value.c[15] = '\0'; /* NULL terminate */ + seq_printf(f, "%-16s", knp->value.c); + break; + /* + * NOTE - We need to be more careful able what tokens are + * used for each arch, for now this is correct for x86_64. + */ + case KSTAT_DATA_INT32: + seq_printf(f, "%d", knp->value.i32); + break; + case KSTAT_DATA_UINT32: + seq_printf(f, "%u", knp->value.ui32); + break; + case KSTAT_DATA_INT64: + seq_printf(f, "%lld", (signed long long)knp->value.i64); + break; + case KSTAT_DATA_UINT64: + seq_printf(f, "%llu", + (unsigned long long)knp->value.ui64); + break; + case KSTAT_DATA_STRING: + KSTAT_NAMED_STR_PTR(knp) + [KSTAT_NAMED_STR_BUFLEN(knp)-1] = '\0'; + seq_printf(f, "%s", KSTAT_NAMED_STR_PTR(knp)); + break; + default: + panic("Undefined kstat data type %d\n", knp->data_type); + } + + seq_printf(f, "\n"); + + return (0); +} + +static int +kstat_seq_show_intr(struct seq_file *f, kstat_intr_t *kip) +{ + seq_printf(f, "%-8u %-8u %-8u %-8u %-8u\n", + kip->intrs[KSTAT_INTR_HARD], + kip->intrs[KSTAT_INTR_SOFT], + kip->intrs[KSTAT_INTR_WATCHDOG], + kip->intrs[KSTAT_INTR_SPURIOUS], + kip->intrs[KSTAT_INTR_MULTSVC]); + + return (0); +} + +static int +kstat_seq_show_io(struct seq_file *f, kstat_io_t *kip) +{ + /* though wlentime & friends are signed, they will never be negative */ + seq_printf(f, + "%-8llu %-8llu %-8u %-8u %-8llu %-8llu " + "%-8llu %-8llu %-8llu %-8llu %-8u %-8u\n", + kip->nread, kip->nwritten, + kip->reads, kip->writes, + kip->wtime, kip->wlentime, kip->wlastupdate, + kip->rtime, kip->rlentime, kip->rlastupdate, + kip->wcnt, kip->rcnt); + + return (0); +} + +static int +kstat_seq_show_timer(struct seq_file *f, kstat_timer_t *ktp) +{ + seq_printf(f, + "%-31s %-8llu %-8llu %-8llu %-8llu %-8llu %-8llu\n", + ktp->name, ktp->num_events, ktp->elapsed_time, + ktp->min_time, ktp->max_time, + ktp->start_time, ktp->stop_time); + + return (0); +} + +static int +show_kstat_line(kstat_t *ks, struct seq_file *sf, int n) +{ + void *data = NULL; + switch (ks->ks_type) { + case KSTAT_TYPE_RAW: + if (ks->raw_ops.addr) { + data = ks->raw_ops.addr(ks, n); + } else { + data = ks->ks_data; + } + + if (data == NULL) { + return (-1); + } + + if (ks->raw_ops.data) { + return (ks->raw_ops.data(sf->buf, + sf->size, data)); + } else { + ASSERT(ks->ks_ndata == 1); + return (kstat_seq_show_raw(sf, + ks->ks_data, ks->ks_data_size)); + } + case KSTAT_TYPE_NAMED: + data = ks->ks_data + n * sizeof (kstat_named_t); + return (kstat_seq_show_named( + sf, (kstat_named_t *)data)); + case KSTAT_TYPE_INTR: + data = ks->ks_data + n * sizeof (kstat_intr_t); + return (kstat_seq_show_intr( + sf, (kstat_intr_t *)data)); + case KSTAT_TYPE_IO: + data = ks->ks_data + n * sizeof (kstat_io_t); + return (kstat_seq_show_io( + sf, (kstat_io_t *)data)); + case KSTAT_TYPE_TIMER: + data = ks->ks_data + n * sizeof (kstat_timer_t); + return (kstat_seq_show_timer( + sf, (kstat_timer_t *)data)); + default: + panic("Undefined kstat type %d\n", ks->ks_type); + } + + return (0); +} + +void +show_kstat_content(kstat_t *ks, const seq_file_generator_t *generator) +{ + if (ks->ks_lock) { + mutex_enter(ks->ks_lock); + } + if (ks->ks_update) { + ks->ks_update(ks, KSTAT_READ); + } + + struct seq_file sf; + if (!(ks->ks_flags & KSTAT_FLAG_NO_HEADERS)) { + generator->generate(generator->arg, &sf); + show_kstat_header(ks, &sf); + } + + generator->generate(generator->arg, &sf); + for (int i = 0; i < ks->ks_ndata; ++i) { + if (show_kstat_line(ks, &sf, i) < 0) { + break; + } + generator->generate(generator->arg, &sf); + } + if (ks->ks_lock) { + mutex_exit(ks->ks_lock); + } +} /* * ========================================================================= @@ -520,7 +782,15 @@ cv_broadcast(kcondvar_t *cv) void seq_printf(struct seq_file *m, const char *fmt, ...) -{} +{ + va_list adx; + va_start(adx, fmt); + vsnprintf(m->buf, m->size, fmt, adx); + va_end(adx); + int len = strlen(m->buf); + m->buf += len; + m->size -= len; +} void procfs_list_install(const char *module, @@ -533,17 +803,42 @@ procfs_list_install(const char *module, int (*clear)(procfs_list_t *procfs_list), size_t procfs_list_node_off) { + snprintf(procfs_list->name, KSTAT_STRLEN, + "%s/%s/%s", module, submodule, name); mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&procfs_list->pl_list, procfs_list_node_off + sizeof (procfs_list_node_t), procfs_list_node_off + offsetof(procfs_list_node_t, pln_link)); procfs_list->pl_next_id = 1; procfs_list->pl_node_offset = procfs_list_node_off; + procfs_list->show = show; + procfs_list->show_header = show_header; + procfs_list->clear = clear; + stat_ops.kstat_install(procfs_list->name, procfs_list, KSTAT_PROCFS); } void procfs_list_uninstall(procfs_list_t *procfs_list) -{} +{ + stat_ops.kstat_uinstall(procfs_list->name); +} + +void +show_procfs_content(procfs_list_t *procfs_list, + const seq_file_generator_t *generator) +{ + struct seq_file sf; + generator->generate(generator->arg, &sf); + procfs_list->show_header(&sf); + + mutex_enter(&procfs_list->pl_lock); + for (void *cur = list_head(&procfs_list->pl_list); cur != NULL; + cur = list_next(&procfs_list->pl_list, cur)) { + generator->generate(generator->arg, &sf); + procfs_list->show(&sf, cur); + } + mutex_exit(&procfs_list->pl_lock); +} void procfs_list_destroy(procfs_list_t *procfs_list) @@ -642,8 +937,6 @@ dprintf_setup(int *argc, char **argv) zfs_flags |= ZFS_DEBUG_DPRINTF; } -void (*print_log)(const char *, int) = NULL; - /* * ========================================================================= * debug printfs @@ -680,8 +973,8 @@ __dprintf(boolean_t new_line, const char *file, const char *func, vsnprintf(buf + len, max_len - len, fmt, adx); va_end(adx); - if (print_log) { - print_log(buf, new_line); + if (stat_ops.print_log) { + stat_ops.print_log(buf, new_line); } } @@ -700,7 +993,7 @@ vpanic(const char *fmt, va_list adx) (void) vfprintf(stderr, fmt, adx); (void) fprintf(stderr, "\n"); - thread_ops.backtrace(); + stat_ops.backtrace(); abort(); /* think of it as a "user-level crash dump" */ } diff --git a/module/zfs/spa_stats.c b/module/zfs/spa_stats.c index 534ac72fee7b..a945201b5ccf 100644 --- a/module/zfs/spa_stats.c +++ b/module/zfs/spa_stats.c @@ -19,6 +19,8 @@ * CDDL HEADER END */ +#include "sync_ops.h" +#include "sys/time.h" #include #include #include @@ -335,6 +337,8 @@ spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time) mutex_exit(&shl->procfs_list.pl_lock); } +extern stat_ops_t stat_ops; + /* * Set txg state completion time and increment current state. */ @@ -354,6 +358,12 @@ spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state, sth = list_prev(&shl->procfs_list.pl_list, sth)) { if (sth->txg == txg) { sth->times[completed_state] = completed_time; + if (completed_state > 0 && spa->metrics) { + hrtime_t delay = completed_time - + sth->times[completed_state - 1]; + stat_ops.record_txg_delays(spa->metrics, + completed_state, delay); + } sth->state++; error = 0; break; diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 9b3d049d79b3..a0c33148eb9f 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -4513,6 +4513,8 @@ vdev_scan_stat_init(vdev_t *vd) mutex_exit(&vd->vdev_stat_lock); } +extern stat_ops_t stat_ops; + void vdev_stat_update(zio_t *zio, uint64_t psize) { diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 0cece6e230e4..35121a9b67fd 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -28,6 +28,8 @@ * Copyright (c) 2021, Datto, Inc. */ +#include "sync_ops.h" +#include "sys/fs/zfs.h" #include #include #include @@ -141,6 +143,16 @@ static inline void __zio_execute(zio_t *zio); static void zio_taskq_dispatch(zio_t *, zio_taskq_type_t, boolean_t); +static inline boolean_t zio_need_record_stage(zio_t *zio) { + return ((zio->io_type == ZIO_TYPE_READ || + zio->io_type == ZIO_TYPE_WRITE) && + (zio->io_pipeline == ZIO_WRITE_PIPELINE || + zio->io_pipeline == ZIO_READ_PIPELINE) && + zio->io_vd != NULL && + zio->io_vd->metrics && + !zio->io_vd->vdev_ops->vdev_op_leaf); +} + void zio_init(void) { @@ -879,6 +891,9 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, zio->io_orig_stage = zio->io_stage = stage; zio->io_orig_pipeline = zio->io_pipeline = pipeline; zio->io_pipeline_trace = ZIO_STAGE_OPEN; + if (zio_need_record_stage(zio)) { + zio->io_stage_start[ZIO_STAGE_OPEN] = gethrtime(); + } zio->io_state[ZIO_WAIT_READY] = (stage >= ZIO_STAGE_READY); zio->io_state[ZIO_WAIT_DONE] = (stage >= ZIO_STAGE_DONE); @@ -2220,12 +2235,16 @@ __zio_execute(zio_t *zio) zio->io_stage = stage; zio->io_pipeline_trace |= zio->io_stage; + int idx = highbit64(stage) - 1; + if (zio_need_record_stage(zio)) { + zio->io_stage_start[idx] = gethrtime(); + } /* * The zio pipeline stage returns the next zio to execute * (typically the same as this one), or NULL if we should * stop. */ - zio = zio_pipeline[highbit64(stage) - 1](zio); + zio = zio_pipeline[idx](zio); if (zio == NULL) return; @@ -4517,6 +4536,8 @@ zio_dva_throttle_done(zio_t *zio) zio_allocate_dispatch(zio->io_spa, pio->io_allocator); } +extern stat_ops_t stat_ops; + static zio_t * zio_done(zio_t *zio) { @@ -4872,6 +4893,11 @@ zio_done(zio_t *zio) zio_notify_parent(pio, zio, ZIO_WAIT_DONE, &next_to_execute); } + if (zio_need_record_stage(zio)) { + stat_ops.record_zio(zio->io_vd->metrics, + zio->io_stage_start, zio->io_type == ZIO_TYPE_READ); + } + if (zio->io_waiter != NULL) { mutex_enter(&zio->io_lock); zio->io_executor = NULL;