From 6089f58f9e50e4db6817a5d42570eace04f9bc6c Mon Sep 17 00:00:00 2001 From: Lars Strojny Date: Thu, 23 Nov 2023 14:43:30 +0100 Subject: [PATCH] Expose zpool status metrics --- .gitignore | 1 + tests/__init__.py | 0 tests/fixtures/zpool_status_-p | 26 ++ tests/fixtures/zpool_status_-p_degraded | 16 + .../fixtures/zpool_status_-p_degraded_sparse | 25 ++ tests/fixtures/zpool_status_-p_logs | 21 ++ tests/fixtures/zpool_status_-p_resilvered | 33 ++ tests/fixtures/zpool_status_-p_scrub | 24 ++ tests/fixtures/zpool_status_-p_unavail | 18 + tests/zfs_zpool_test.py | 252 +++++++++++++ zfs_zpool.py | 332 +++++++++++++++--- 11 files changed, 692 insertions(+), 56 deletions(-) create mode 100644 .gitignore create mode 100644 tests/__init__.py create mode 100644 tests/fixtures/zpool_status_-p create mode 100644 tests/fixtures/zpool_status_-p_degraded create mode 100644 tests/fixtures/zpool_status_-p_degraded_sparse create mode 100644 tests/fixtures/zpool_status_-p_logs create mode 100644 tests/fixtures/zpool_status_-p_resilvered create mode 100644 tests/fixtures/zpool_status_-p_scrub create mode 100644 tests/fixtures/zpool_status_-p_unavail create mode 100644 tests/zfs_zpool_test.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bee8a64 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__ diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/fixtures/zpool_status_-p b/tests/fixtures/zpool_status_-p new file mode 100644 index 0000000..b0f4c77 --- /dev/null +++ b/tests/fixtures/zpool_status_-p @@ -0,0 +1,26 @@ + pool: pool0 + state: ONLINE + scan: scrub repaired 0B in 06:58:02 with 0 errors on Sun Nov 12 07:22:03 2023 +config: + + NAME STATE READ WRITE CKSUM + pool0 ONLINE 0 0 0 + raidz1-0 ONLINE 0 0 0 + ata-TOSHIBA_MG09ACA18TE_82X0A0QMFJDH ONLINE 0 0 0 + ata-TOSHIBA_MG09ACA18TE_82J0A00FFJDH ONLINE 0 0 0 + ata-TOSHIBA_MG09ACA18TE_82X0A0QPFJDH ONLINE 0 0 0 + +errors: No known data errors + + pool: pool1 + state: ONLINE + scan: scrub repaired 1M in 00:04:45 with 0 errors on Sun Nov 12 00:28:47 2023 +config: + + NAME STATE READ WRITE CKSUM + pool1 ONLINE 0 0 0 + mirror-0 ONLINE 0 0 0 + nvme-Samsung_SSD_980_500GB_S64DNL0T824602E-part1 ONLINE 0 0 0 + nvme-Samsung_SSD_980_500GB_S64DNL0T824555Z-part1 ONLINE 0 0 0 + +errors: No known data errors \ No newline at end of file diff --git a/tests/fixtures/zpool_status_-p_degraded b/tests/fixtures/zpool_status_-p_degraded new file mode 100644 index 0000000..29faac9 --- /dev/null +++ b/tests/fixtures/zpool_status_-p_degraded @@ -0,0 +1,16 @@ + pool: tank + state: DEGRADED +status: One or more devices could not be opened. Sufficient replicas exist for + the pool to continue functioning in a degraded state. +action: Attach the missing device and online it using 'zpool online'. + see: http://www.sun.com/msg/ZFS-8000-2Q + scrub: none requested +config: + + NAME STATE READ WRITE CKSUM + tank DEGRADED 0 0 0 + mirror-0 DEGRADED 0 0 0 + c1t0d0 ONLINE 0 0 0 + c1t1d0 UNAVAIL 0 0 0 cannot open + +errors: No known data errors diff --git a/tests/fixtures/zpool_status_-p_degraded_sparse b/tests/fixtures/zpool_status_-p_degraded_sparse new file mode 100644 index 0000000..a2db4e4 --- /dev/null +++ b/tests/fixtures/zpool_status_-p_degraded_sparse @@ -0,0 +1,25 @@ + pool: test + state: DEGRADED +status: One or more devices could not be used because the label is missing or + invalid. Sufficient replicas exist for the pool to continue + functioning in a degraded state. +action: Replace the device using 'zpool replace'. + see: http://zfsonlinux.org/msg/ZFS-8000-4J + scan: resilvered 25.8M in 0h0m with 0 errors on Fri Aug 22 12:02:46 2014 +config: + + NAME STATE READ WRITE CKSUM + test DEGRADED 0 0 0 + mirror-0 DEGRADED 0 0 0 + spare-0 DEGRADED 1 0 0 + ata-VBOX_HARDDISK_VBb80f1f56-538e9acf ONLINE 0 0 1 + 16876260487220383698 FAULTED 0 0 0 was /dev/disk/by-path/pci-0000:00:0d.0-scsi-12:0:0:0-part1 + ata-VBOX_HARDDISK_VB875e28a5-4b293298 ONLINE 0 0 0 + mirror-1 ONLINE 0 0 0 + ata-VBOX_HARDDISK_VB4145ff65-9b1320a3 ONLINE 0 0 0 + ata-VBOX_HARDDISK_VBee9d66a1-edf52bff ONLINE 0 0 0 + spares + pci-0000:00:0d.0-scsi-10:0:0:0 AVAIL + pci-0000:00:0d.0-scsi-11:0:0:0 AVAIL + pci-0000:00:0d.0-scsi-12:0:0:0 AVAIL + pci-0000:00:0d.0-scsi-13:0:0:0 AVAIL diff --git a/tests/fixtures/zpool_status_-p_logs b/tests/fixtures/zpool_status_-p_logs new file mode 100644 index 0000000..eeb8cfd --- /dev/null +++ b/tests/fixtures/zpool_status_-p_logs @@ -0,0 +1,21 @@ + pool: zones + state: DEGRADED +status: One or more devices could not be opened. Sufficient replicas exist for + the pool to continue functioning in a degraded state. +action: Attach the missing device and online it using 'zpool online'. + see: http://illumos.org/msg/ZFS-8000-2Q + scan: scrub repaired 0 in 29h37m with 0 errors on Thu Jul 14 18:42:06 2016 +config: + + NAME STATE READ WRITE CKSUM + zones DEGRADED 0 0 0 + mirror-0 DEGRADED 0 0 0 + 9669136929555758172 UNAVAIL 0 0 0 was /dev/dsk/c0t5000C5006349E003d0s0 + c0t5000C500631F81E7d0 ONLINE 0 0 0 + mirror-1 ONLINE 0 0 0 + c0t5000C500634A297Bd0 ONLINE 0 0 0 + c0t5000C500634B4EA3d0 ONLINE 0 0 0 + logs + c0t55CD2E404B73663Dd0 ONLINE 0 0 0 + +errors: No known data errors diff --git a/tests/fixtures/zpool_status_-p_resilvered b/tests/fixtures/zpool_status_-p_resilvered new file mode 100644 index 0000000..431019e --- /dev/null +++ b/tests/fixtures/zpool_status_-p_resilvered @@ -0,0 +1,33 @@ + pool: rpool + state: DEGRADED +status: One or more devices could not be opened. Sufficient replicas exist for + the pool to continue functioning in a degraded state. +action: Attach the missing device and online it using 'zpool online'. + see: http://www.sun.com/msg/ZFS-8000-2Q + scan: resilvered 1.41M in 0h0m with 0 errors on Tue Nov 15 05:31:36 2011 +config: + + NAME STATE READ WRITE CKSUM + rpool DEGRADED 0 0 0 + mirror-0 DEGRADED 0 0 0 + c4d1s0 UNAVAIL 0 0 0 cannot open + c2t1d0s0 ONLINE 0 0 0 + c3d1s0 UNAVAIL 0 0 0 cannot open + +errors: No known data errors + + pool: zpool + state: UNAVAIL +status: One or more devices could not be opened. There are insufficient + replicas for the pool to continue functioning. +action: Attach the missing device and online it using 'zpool online'. + see: http://www.sun.com/msg/ZFS-8000-3C + scan: none requested +config: + + NAME STATE READ WRITE CKSUM + zpool UNAVAIL 0 0 0 insufficient replicas + raidz1-0 UNAVAIL 0 0 0 insufficient replicas + c2t1d0p2 ONLINE 0 0 0 + c4d1p2 UNAVAIL 0 0 0 cannot open + c3d1p2 UNAVAIL 0 0 0 cannot open diff --git a/tests/fixtures/zpool_status_-p_scrub b/tests/fixtures/zpool_status_-p_scrub new file mode 100644 index 0000000..1a19472 --- /dev/null +++ b/tests/fixtures/zpool_status_-p_scrub @@ -0,0 +1,24 @@ + pool: freenas-boot + state: ONLINE + scan: scrub repaired 0 in 0h2m with 0 errors on Wed Jan 25 03:47:27 2017 +config: + + NAME STATE READ WRITE CKSUM + freenas-boot ONLINE 0 0 0 + da0p2 ONLINE 0 0 0 + +errors: No known data errors + + pool: nas_zfs_vol0 + state: ONLINE + scan: scrub repaired 0 in 7h7m with 0 errors on Sun Jan 8 07:07:22 2017 +config: + + NAME STATE READ WRITE CKSUM + nas_zfs_vol0 ONLINE 0 0 0 + mirror-0 ONLINE 0 0 0 + gptid/a855d0c8-5218-11e3-9e38-10604b926998 ONLINE 0 0 0 + gptid/a8c3fe2f-5218-11e3-9e38-10604b926998 ONLINE 0 0 0 + mirror-1 ONLINE 0 0 0 + gptid/a91ebd06-5218-11e3-9e38-10604b926998 ONLINE 0 0 0 + gptid/a96f4d37-5218-11e3-9e38-10604b926998 ONLINE 0 0 0 diff --git a/tests/fixtures/zpool_status_-p_unavail b/tests/fixtures/zpool_status_-p_unavail new file mode 100644 index 0000000..e963a29 --- /dev/null +++ b/tests/fixtures/zpool_status_-p_unavail @@ -0,0 +1,18 @@ + pool: tank + state: UNAVAIL +status: One or more devices are faulted in response to IO failures. +action: Make sure the affected devices are connected, then run 'zpool clear'. + see: http://www.sun.com/msg/ZFS-8000-HC + scrub: scrub completed after 0h0m with 0 errors on Tue Feb 2 13:08:42 2010 +config: + + NAME STATE READ WRITE CKSUM + tank UNAVAIL 0 0 0 insufficient replicas + c1t0d0 ONLINE 0 0 0 + c1t1d0 UNAVAIL 4 1 0 cannot open + +errors: Permanent errors have been detected in the following files: + +/tank/data/aaa +/tank/data/bbb +/tank/data/ccc diff --git a/tests/zfs_zpool_test.py b/tests/zfs_zpool_test.py new file mode 100644 index 0000000..6f8723c --- /dev/null +++ b/tests/zfs_zpool_test.py @@ -0,0 +1,252 @@ +from datetime import datetime, timedelta +from zfs_zpool import zpool_status_parse, ZpoolStatus, ZpoolConfig, ZpoolScan +from pathlib import Path + + +def test_zpool_status_parse_complex(): + assert zpool_status_parse(Path(__file__).parent.joinpath('fixtures/zpool_status_-p').read_text()) == [ + ZpoolStatus( + name='pool0', + state='ONLINE', + configs=[ + ZpoolConfig(name='pool0', path=['pool0'], state='ONLINE', read=0, write=0, checksum=0), + ZpoolConfig(name='raidz1-0', path=['pool0', 'raidz1-0'], state='ONLINE', read=0, write=0, checksum=0), + ZpoolConfig(name='ata-TOSHIBA_MG09ACA18TE_82X0A0QMFJDH', + path=['pool0', 'raidz1-0', 'ata-TOSHIBA_MG09ACA18TE_82X0A0QMFJDH'], state='ONLINE', read=0, + write=0, checksum=0), + ZpoolConfig(name='ata-TOSHIBA_MG09ACA18TE_82J0A00FFJDH', + path=['pool0', 'raidz1-0', 'ata-TOSHIBA_MG09ACA18TE_82J0A00FFJDH'], state='ONLINE', read=0, + write=0, checksum=0), + ZpoolConfig(name='ata-TOSHIBA_MG09ACA18TE_82X0A0QPFJDH', + path=['pool0', 'raidz1-0', 'ata-TOSHIBA_MG09ACA18TE_82X0A0QPFJDH'], state='ONLINE', read=0, + write=0, checksum=0), + ], + scrub=ZpoolScan(at=datetime(2023, 11, 12, 7, 22, 3), + duration=timedelta(seconds=25082), + corrected=0), + ), + ZpoolStatus( + name='pool1', + state='ONLINE', + configs=[ + ZpoolConfig(name='pool1', path=['pool1'], state='ONLINE', read=0, write=0, checksum=0), + ZpoolConfig(name='mirror-0', path=['pool1', 'mirror-0'], state='ONLINE', read=0, write=0, checksum=0), + ZpoolConfig(name='nvme-Samsung_SSD_980_500GB_S64DNL0T824602E-part1', path=[ + 'pool1', 'mirror-0', 'nvme-Samsung_SSD_980_500GB_S64DNL0T824602E-part1'], state='ONLINE', read=0, + write=0, checksum=0), + ZpoolConfig(name='nvme-Samsung_SSD_980_500GB_S64DNL0T824555Z-part1', + path=['pool1', 'mirror-0', 'nvme-Samsung_SSD_980_500GB_S64DNL0T824555Z-part1'], + state='ONLINE', read=0, write=0, checksum=0), + ], + scrub=ZpoolScan(at=datetime(2023, 11, 12, 0, 28, 47), + duration=timedelta(seconds=285), + corrected=1048576), + ), + ] + + +def test_zpool_status_parse_unavail(): + assert zpool_status_parse(Path(__file__).parent.joinpath('fixtures/zpool_status_-p_unavail').read_text()) == [ + ZpoolStatus( + name='tank', + state='UNAVAIL', + configs=[ + ZpoolConfig(name='tank', path=['tank'], state='UNAVAIL', read=0, write=0, checksum=0, + comment='insufficient replicas'), + ZpoolConfig(name='c1t0d0', path=['tank', 'c1t0d0'], state='ONLINE', read=0, write=0, checksum=0), + ZpoolConfig(name='c1t1d0', path=['tank', 'c1t1d0'], state='UNAVAIL', read=4, write=1, checksum=0, + comment='cannot open') + ] + ) + ] + + +def test_zpool_status_parse_degraded_simple(): + assert zpool_status_parse(Path(__file__).parent.joinpath('fixtures/zpool_status_-p_degraded').read_text()) == [ + ZpoolStatus( + name='tank', + state='DEGRADED', + configs=[ + ZpoolConfig(name='tank', path=['tank'], state='DEGRADED', read=0, write=0, checksum=0), + ZpoolConfig(name='mirror-0', path=['tank', 'mirror-0'], state='DEGRADED', read=0, write=0, checksum=0), + ZpoolConfig(name='c1t0d0', path=['tank', 'mirror-0', 'c1t0d0'], state='ONLINE', read=0, write=0, + checksum=0), + ZpoolConfig(name='c1t1d0', path=['tank', 'mirror-0', 'c1t1d0'], state='UNAVAIL', read=0, write=0, + checksum=0, comment='cannot open'), + ] + ) + ] + + +def test_zpool_status_parse_degraded_with_spare(): + assert zpool_status_parse( + Path(__file__).parent.joinpath('fixtures/zpool_status_-p_degraded_sparse').read_text()) == [ + ZpoolStatus(name='test', configs=[ + ZpoolConfig(name='test', path=['test'], state='DEGRADED', read=0, write=0, checksum=0), + ZpoolConfig(name='mirror-0', path=['test', 'mirror-0'], state='DEGRADED', read=0, write=0, + checksum=0), + ZpoolConfig(name='spare-0', path=['test', 'mirror-0', 'spare-0'], state='DEGRADED', read=1, write=0, + checksum=0), + ZpoolConfig(name='ata-VBOX_HARDDISK_VBb80f1f56-538e9acf', + path=['test', 'mirror-0', 'spare-0', 'ata-VBOX_HARDDISK_VBb80f1f56-538e9acf'], + state='ONLINE', + read=0, write=0, checksum=1), + ZpoolConfig(name='pci-0000:00:0d.0-scsi-12:0:0:0-part1', + path=['test', 'mirror-0', 'spare-0', 'pci-0000:00:0d.0-scsi-12:0:0:0-part1'], + state='FAULTED', + read=0, write=0, checksum=0, + comment='was /dev/disk/by-path/pci-0000:00:0d.0-scsi-12:0:0:0-part1'), + ZpoolConfig(name='ata-VBOX_HARDDISK_VB875e28a5-4b293298', + path=['test', 'mirror-0', 'ata-VBOX_HARDDISK_VB875e28a5-4b293298'], state='ONLINE', + read=0, + write=0, checksum=0), + ZpoolConfig(name='mirror-1', path=['test', 'mirror-1'], state='ONLINE', read=0, write=0, checksum=0), + ZpoolConfig(name='ata-VBOX_HARDDISK_VB4145ff65-9b1320a3', + path=['test', 'mirror-1', 'ata-VBOX_HARDDISK_VB4145ff65-9b1320a3'], state='ONLINE', + read=0, + write=0, checksum=0), + ZpoolConfig(name='ata-VBOX_HARDDISK_VBee9d66a1-edf52bff', + path=['test', 'mirror-1', 'ata-VBOX_HARDDISK_VBee9d66a1-edf52bff'], state='ONLINE', + read=0, + write=0, checksum=0), + ZpoolConfig(name='pci-0000:00:0d.0-scsi-10:0:0:0', path=['spares', 'pci-0000:00:0d.0-scsi-10:0:0:0'], + state='AVAIL', is_spare=True), + ZpoolConfig(name='pci-0000:00:0d.0-scsi-11:0:0:0', path=['spares', 'pci-0000:00:0d.0-scsi-11:0:0:0'], + state='AVAIL', is_spare=True), + ZpoolConfig(name='pci-0000:00:0d.0-scsi-12:0:0:0', path=['spares', 'pci-0000:00:0d.0-scsi-12:0:0:0'], + state='AVAIL', is_spare=True), + ZpoolConfig(name='pci-0000:00:0d.0-scsi-13:0:0:0', path=['spares', 'pci-0000:00:0d.0-scsi-13:0:0:0'], + state='AVAIL', is_spare=True), + ], state='DEGRADED', + resilvering=ZpoolScan(at=datetime(2014, 8, 22, 12, 2, 46), + duration=timedelta(0), + corrected=27053261) + ) + ] + + +def test_zpool_status_parse_resilvered(): + assert zpool_status_parse( + Path(__file__).parent.joinpath('fixtures/zpool_status_-p_resilvered').read_text()) == [ + ZpoolStatus(name='rpool', + state='DEGRADED', + configs=[ZpoolConfig(name='rpool', path=['rpool'], state='DEGRADED', read=0, write=0, + checksum=0, ), + ZpoolConfig(name='mirror-0', path=['rpool', 'mirror-0'], state='DEGRADED', read=0, + write=0, checksum=0, ), + ZpoolConfig(name='c4d1s0', path=['rpool', 'mirror-0', 'c4d1s0'], state='UNAVAIL', + read=0, write=0, checksum=0, comment='cannot open'), + ZpoolConfig(name='c2t1d0s0', path=['rpool', 'mirror-0', 'c2t1d0s0'], state='ONLINE', + read=0, write=0, checksum=0), + ZpoolConfig(name='c3d1s0', path=['rpool', 'mirror-0', 'c3d1s0'], state='UNAVAIL', + read=0, write=0, checksum=0, comment='cannot open')], + resilvering=ZpoolScan(at=datetime(2011, 11, 15, 5, 31, 36), + duration=timedelta(seconds=0), + corrected=1478492), + ), + ZpoolStatus(name='zpool', + state='UNAVAIL', + configs=[ + ZpoolConfig(name='zpool', path=['zpool'], state='UNAVAIL', read=0, write=0, checksum=0, + comment='insufficient replicas'), + ZpoolConfig(name='raidz1-0', path=['zpool', 'raidz1-0'], state='UNAVAIL', read=0, + write=0, checksum=0, comment='insufficient replicas'), + ZpoolConfig(name='c2t1d0p2', path=['zpool', 'raidz1-0', 'c2t1d0p2'], state='ONLINE', + read=0, write=0, checksum=0), + ZpoolConfig(name='c4d1p2', path=['zpool', 'raidz1-0', 'c4d1p2'], state='UNAVAIL', read=0, + write=0, checksum=0, comment='cannot open'), + ZpoolConfig(name='c3d1p2', path=['zpool', 'raidz1-0', 'c3d1p2'], state='UNAVAIL', read=0, + write=0, checksum=0, comment='cannot open')], + ) + ] + + +def test_zpool_status_scrub(): + assert zpool_status_parse( + Path(__file__).parent.joinpath('fixtures/zpool_status_-p_scrub').read_text()) == [ + ZpoolStatus(name='freenas-boot', + state='ONLINE', + configs=[ + ZpoolConfig(name='freenas-boot', path=['freenas-boot'], state='ONLINE', read=0, write=0, + checksum=0), + ZpoolConfig(name='da0p2', path=['freenas-boot', 'da0p2'], state='ONLINE', read=0, + write=0, checksum=0)], + scrub=ZpoolScan(at=datetime(2017, 1, 25, 3, 47, 27), + duration=timedelta(seconds=120), + corrected=0) + ), + ZpoolStatus(name='nas_zfs_vol0', + state='ONLINE', + configs=[ + ZpoolConfig(name='nas_zfs_vol0', path=['nas_zfs_vol0'], state='ONLINE', read=0, write=0, + checksum=0), + ZpoolConfig(name='mirror-0', path=['nas_zfs_vol0', 'mirror-0'], state='ONLINE', read=0, + write=0, + checksum=0), + ZpoolConfig(name='gptid/a855d0c8-5218-11e3-9e38-10604b926998', + path=['nas_zfs_vol0', 'mirror-0', + 'gptid/a855d0c8-5218-11e3-9e38-10604b926998'], + state='ONLINE', read=0, write=0, checksum=0), + ZpoolConfig(name='gptid/a8c3fe2f-5218-11e3-9e38-10604b926998', + path=['nas_zfs_vol0', 'mirror-0', + 'gptid/a8c3fe2f-5218-11e3-9e38-10604b926998'], + state='ONLINE', read=0, write=0, checksum=0), + ZpoolConfig(name='mirror-1', path=['nas_zfs_vol0', 'mirror-1'], state='ONLINE', read=0, + write=0, + checksum=0), + ZpoolConfig(name='gptid/a91ebd06-5218-11e3-9e38-10604b926998', + path=['nas_zfs_vol0', 'mirror-1', + 'gptid/a91ebd06-5218-11e3-9e38-10604b926998'], + state='ONLINE', read=0, write=0, checksum=0), + ZpoolConfig(name='gptid/a96f4d37-5218-11e3-9e38-10604b926998', + path=['nas_zfs_vol0', 'mirror-1', + 'gptid/a96f4d37-5218-11e3-9e38-10604b926998'], + state='ONLINE', read=0, write=0, checksum=0), + ], + scrub=ZpoolScan(at=datetime(2017, 1, 8, 7, 7, 22), + duration=timedelta(seconds=25620), + corrected=0) + ) + ] + + +def test_zpool_status_logs(): + assert zpool_status_parse( + Path(__file__).parent.joinpath('fixtures/zpool_status_-p_logs').read_text()) == [ + ZpoolStatus(name='zones', + state='DEGRADED', + configs=[ + ZpoolConfig(name='zones', path=['zones'], state='DEGRADED', read=0, write=0, checksum=0), + ZpoolConfig(name='mirror-0', path=['zones', 'mirror-0'], state='DEGRADED', read=0, + write=0, checksum=0), + ZpoolConfig(name='c0t5000C5006349E003d0s0', + path=['zones', 'mirror-0', 'c0t5000C5006349E003d0s0'], state='UNAVAIL', + read=0, write=0, checksum=0, comment='was /dev/dsk/c0t5000C5006349E003d0s0'), + ZpoolConfig(name='c0t5000C500631F81E7d0', + path=['zones', 'mirror-0', 'c0t5000C500631F81E7d0'], state='ONLINE', read=0, + write=0, checksum=0), + ZpoolConfig(name='mirror-1', path=['zones', 'mirror-1'], state='ONLINE', read=0, write=0, + checksum=0), + ZpoolConfig(name='c0t5000C500634A297Bd0', + path=['zones', 'mirror-1', 'c0t5000C500634A297Bd0'], state='ONLINE', read=0, + write=0, checksum=0), + ZpoolConfig(name='c0t5000C500634B4EA3d0', + path=['zones', 'mirror-1', 'c0t5000C500634B4EA3d0'], state='ONLINE', read=0, + write=0, checksum=0), ZpoolConfig(name='logs', path=['logs'], state=''), + ZpoolConfig(name='c0t55CD2E404B73663Dd0', path=['logs', 'c0t55CD2E404B73663Dd0'], + state='ONLINE', read=0, write=0, checksum=0) + ], + scrub=ZpoolScan(at=datetime(2016, 7, 14, 18, 42, 6), + duration=timedelta(seconds=106620), + corrected=0), + ) + ] + + +def test_zpool_status_parse_empty(): + assert zpool_status_parse('') == [] + assert zpool_status_parse('\n\n') == [] + + +def test_zpool_status_parse_garbage(): + assert zpool_status_parse('pool: foo\npool: bar') == [] diff --git a/zfs_zpool.py b/zfs_zpool.py index baccc3d..10a4b94 100644 --- a/zfs_zpool.py +++ b/zfs_zpool.py @@ -1,109 +1,329 @@ #!/usr/bin/env python3 import os +import re import subprocess from concurrent.futures import ThreadPoolExecutor -from typing import Tuple - +from dataclasses import dataclass, replace +from datetime import datetime, timedelta +from functools import reduce from prometheus_client import CollectorRegistry, Gauge, generate_latest -ZPOOL_METADATA_LABELS = ("health", "version", "readonly", "ashift", "autoreplace", "failmode") +ZPOOL_METADATA_LABELS = ('health', 'version', 'readonly', 'ashift', 'autoreplace', 'failmode') -def zpool_metadata(registry): - metric = Gauge("zpool", "Constant metric with metadata about the zpool", - labelnames=['zpool_name', *ZPOOL_METADATA_LABELS], namespace='zfs', registry=registry, ) - cmd = ('zpool', 'list', '-H', '-o', 'name,' + ",".join(ZPOOL_METADATA_LABELS)) - for constant_labels in run(cmd): - metric.labels(*constant_labels).set(1) +def zpool_metadata(registry: CollectorRegistry): + metric = Gauge('zpool', 'Constant metric with metadata about the zpool', + labelnames=['zpool_name', *ZPOOL_METADATA_LABELS], namespace='zfs', registry=registry) + cmd = ('zpool', 'list', '-H', '-o', 'name,' + ','.join(ZPOOL_METADATA_LABELS)) + for constant_labels in run_tabular(cmd): + metric.labels(*constant_labels) -def run(cmd): +def run(cmd: tuple[str, ...]): popen = subprocess.Popen( - cmd, stdout=subprocess.PIPE, env=dict(os.environ, LC_ALL="C") + cmd, stdout=subprocess.PIPE, env=dict(os.environ, LC_ALL='C') ) - for stdout_line in iter(popen.stdout.readline, ""): - if stdout_line == b"": + + if popen.stdout is None: + return + + for stdout_line in iter(popen.stdout.readline, ''): + if stdout_line == b'': break - yield stdout_line.strip().decode("utf-8").split("\t") + yield stdout_line.decode('utf-8') return_code = popen.wait() if return_code > 0: raise subprocess.CalledProcessError(return_code, cmd) +def run_tabular(cmd): + for line in run(cmd): + yield line.strip().split('\t') + + ZPOOL_INFO_METRICS = ( - ("size", "Total size of the storage pool", "bytes"), - ("free", "The amount of free space available in the pool", "bytes"), - ("freeing", "The amount of space waiting to be reclaimed from destroyed filesystems or snapshots", "bytes"), - ('dedupratio', "The deduplication ratio", ""), - ("fragmentation", "The amount of fragmentation in the pool", "") + ('size', 'Total size of the storage pool', 'bytes'), + ('free', 'The amount of free space available in the pool', 'bytes'), + ('freeing', 'The amount of space waiting to be reclaimed from destroyed filesystems or snapshots', 'bytes'), + ('dedupratio', 'The deduplication ratio', ''), + ('fragmentation', 'The amount of fragmentation in the pool', '') ) -def zpool_info(registry): - cmd = ('zpool', 'list', '-Hp', '-o', "name," + ','.join([col for (col, *_) in ZPOOL_INFO_METRICS])) +def zpool_info(registry: CollectorRegistry): + cmd = ('zpool', 'list', '-Hp', '-o', 'name,' + ','.join([column_name for (column_name, *_) in ZPOOL_INFO_METRICS])) metrics = {} - for line in run(cmd): - for (idx, (col, doc, unit)) in enumerate(ZPOOL_INFO_METRICS, 1): - if col not in metrics: - metrics[col] = Gauge(col, documentation=doc, unit=unit, namespace='zfs_zpool', registry=registry, - labelnames=["zpool_name"]) - metrics[col].labels((line[0])).set(float(line[idx])) + for columns in run_tabular(cmd): + for (idx, (column_name, doc, unit)) in enumerate(ZPOOL_INFO_METRICS, 1): + if column_name not in metrics: + metrics[column_name] = Gauge(f'zpool_{column_name}', documentation=doc, unit=unit, namespace='zfs', + registry=registry, + labelnames=['pool_name']) + metrics[column_name].labels((columns[0])).set(float(columns[idx])) -DATASET_METADATA_LABELS = ("type", "creation", "mounted", "mountpoint", "checksum", "compression", "readonly", - "version", "dedup", "volblocksize") +DATASET_METADATA_LABELS = ['type', 'creation', 'mounted', 'mounted', 'checksum', 'compression', 'readonly', + 'version', 'dedup', 'volblocksize'] -DATASET_TYPES = ("filesystem", "volume") +DATASET_TYPES = ('filesystem', 'volume') -def dataset_metadata(registry): - cmd = ("zfs", "list", "-Hp", "-t", ",".join(DATASET_TYPES), "-o", "name," + ",".join(DATASET_METADATA_LABELS)) - metric = Gauge("dataset", documentation="Constant metric with metadata about the zfs dataset", namespace="zfs", - registry=registry, labelnames=["dataset_name", *DATASET_METADATA_LABELS]) - for line in run(cmd): - metric.labels(*line).set(1) +def dataset_metadata(registry: CollectorRegistry): + cmd = ('zfs', 'list', '-Hp', '-t', ','.join(DATASET_TYPES), '-o', 'name,' + ','.join(DATASET_METADATA_LABELS)) + metric = Gauge('dataset', documentation='Constant metric with metadata about the zfs dataset', namespace='zfs', + registry=registry, labelnames=['dataset_name', *DATASET_METADATA_LABELS]) + for columns in run_tabular(cmd): + metric.labels(*columns).set(1) DATASET_INFO_METRICS = ( - ("used", "The amount of space consumed by this dataset and all its descendents", "bytes"), - ("available", "The amount of space available to the dataset and all its children", "bytes"), - ("referenced", - "The amount of data that is accessible by this dataset, which may or may not be shared with other datasets in the pool", - "bytes"), - ("compressratio", - "For non-snapshots, the compression ratio achieved for the used space of this dataset, expressed as a multiplier", - ""), - ("reservation", "The minimum amount of space guaranteed to a dataset and its descendants", "bytes"), - ("refreservation", "The minimum amount of space guaranteed to a dataset, not including its descendents", "bytes"), - ("volsize", "For volumes, specifies the logical size of the volume", "bytes") + ('used', 'The amount of space consumed by this dataset and all its descendents', 'bytes'), + ('available', 'The amount of space available to the dataset and all its children', 'bytes'), + ('referenced', + 'The amount of data that is accessible by this dataset, which may or may not be shared with other datasets in the pool', + 'bytes'), + ('compressratio', + 'For non-snapshots, the compression ratio achieved for the used space of this dataset, expressed as a multiplier', + ''), + ('reservation', 'The minimum amount of space guaranteed to a dataset and its descendants', 'bytes'), + ('refreservation', 'The minimum amount of space guaranteed to a dataset, not including its descendents', 'bytes'), + ('volsize', 'For volumes, specifies the logical size of the volume', 'bytes') ) -def dataset_metrics(registry): - cmd = ("zfs", "list", "-Hp", "-t", ",".join(DATASET_TYPES), "-o", "name," + ",".join([col for (col, *_) in DATASET_INFO_METRICS])) +def dataset_metrics(registry: CollectorRegistry): + cmd = ('zfs', 'list', '-Hp', '-t', ','.join(DATASET_TYPES), '-o', + 'name,' + ','.join([col for (col, *_) in DATASET_INFO_METRICS])) metrics = {} - for line in run(cmd): + for columns in run_tabular(cmd): for (idx, (col, doc, unit)) in enumerate(DATASET_INFO_METRICS, 1): if col not in metrics: - metrics[col] = Gauge(col, documentation=doc, unit=unit, registry=registry, labelnames=["dataset_name"], - namespace="zfs_dataset") + metrics[col] = Gauge(f'dataset_{col}', documentation=doc, unit=unit, registry=registry, + labelnames=['dataset_name'], + namespace='zfs') - if line[idx] == "-": + if columns[idx] == '-': continue - metrics[col].labels((line[0])).set(float(line[idx].rstrip("x"))) + metrics[col].labels((columns[0])).set(float(columns[idx].rstrip('x'))) + + +@dataclass +class ZpoolConfig: + name: str + path: list[str] + state: str + read: int | None = None + write: int | None = None + checksum: int | None = None + comment: str | None = None + is_spare: bool = False + indent: int = 0 + leading_whitespace: str = '' + + +@dataclass +class ZpoolScan: + at: datetime + duration: timedelta + corrected: int + + +@dataclass +class ZpoolStatus: + name: str + state: str + configs: list[ZpoolConfig] + scrub: ZpoolScan | None = None + resilvering: ZpoolScan | None = None + +def zpool_status(registry: CollectorRegistry): + cmd = ('zpool', 'status', '-p') + metrics = {} + + for status in zpool_status_parse('\n'.join(list(run(cmd)))): + if 'status' not in metrics: + metrics['status'] = Gauge('zpool_status', 'The status of the zpool', labelnames=['zpool_name', 'state'], + namespace='zfs', registry=registry) + metrics['status'].labels(status.name, status.state).set(1) + + if status.scrub: + scan_metrics('scrub', metrics, registry, status.scrub) + + if status.resilvering: + scan_metrics('resilvering', metrics, registry, status.resilvering) + + for config in status.configs: + if 'vdev_info' not in metrics: + metrics['vdev_info'] = Gauge('zpool_vdev_info', 'Information about the vdevs in a zpool', + labelnames=['zpool_name', 'vdev_name', 'path', 'state', 'read', 'write', + 'checksum'], + namespace='zfs', registry=registry) + metrics['vdev_info'].labels(status.name, config.name, f'{config.path[0]}://{"/".join(config.path[1:])}', + config.state, + none_to_empty_string(config.read), none_to_empty_string(config.write), + none_to_empty_string(config.checksum)).set(1) + + +def scan_metrics(activity: str, metrics: dict[str, Gauge], registry: CollectorRegistry, status: ZpoolStatus, scan: ZpoolScan): + if f'{activity}_duration' not in metrics: + metrics[f'{activity}_duration'] = Gauge(f'zpool_{activity}_duration', + f'The duration of the latest zpool {activity} in seconds', + labelnames=['zpool_name'], namespace='zfs', unit='seconds', + registry=registry) + metrics[f'{activity}_duration'].labels(status.name).set(scan.duration.total_seconds()) + if f'{activity}_corrected' not in metrics: + metrics[f'{activity}_corrected'] = Gauge(f'zpool_{activity}_corrected', + f'The number of corrected bytes of the latest zpool {activity}', + labelnames=['zpool_name'], namespace='zfs', + unit='bytes', + registry=registry) + metrics[f'{activity}_corrected'].labels(status.name).set(scan.corrected) + if f'{activity}_time' not in metrics: + metrics[f'{activity}_time'] = Gauge(f'zpool_{activity}_time', + f'The timestamp of the latest zpool {activity}', + labelnames=['zpool_name'], namespace='zfs', unit='seconds', + registry=registry) + metrics[f'{activity}_time'].labels(status.name).set(scan.at.timestamp()) + + +def none_to_empty_string(value): + return '' if value is None else value + + +def zpool_status_parse(content: str) -> list[ZpoolStatus]: + statuses: list[ZpoolStatus] = [] + + for status in re.findall(r'^\s*pool:\s+(?:.+(?=^\s*pool:\s+)|.+\Z)', content, re.MULTILINE | re.DOTALL): + matched_pairs: list[tuple[str, str]] = re.findall(r'^\s*(\w+):\s*(.+?(?=^\s*\w+:)|.*\Z)', status, + re.MULTILINE | re.DOTALL) + matches = dict([(key, value.strip()) for key, value in matched_pairs]) + + configs = re.findall( + r'^([\t ]*)(\S+)(?:[\t ]+(\S+)(?:[\t ]+(\S+)[\t ]+(\S+)[\t ]+(\S+)(?:[\t ]+([^\n]+))?)?)?$', + matches.get('config', ''), re.MULTILINE | re.DOTALL) + + if len(configs) == 0: + continue + + configs = [ZpoolConfig( + name=config[1].strip(), + path=[], + state=config[2].strip(), + read=int(config[3]) if config[3] != '' else None, + write=int(config[4]) if config[4] != '' else None, + checksum=int(config[5]) if config[5] != '' else None, + comment=config[6] if config[6] != '' else None, + leading_whitespace=config[0], + ) for config in configs[1:]] + + configs = reduce( + lambda acc, config: acc + [ + replace(config, is_spare=config.name == 'spares' or (acc[-1].is_spare if len(acc) > 0 else False))], + configs, []) + + # Size the indentation of each line and strip, remove headlines + configs = [replace(config, indent=int(len(config.leading_whitespace) / 2)) for config in configs] + + # Normalize names + configs = [replace(config, name=config.comment[4:].split('/')[-1] if str(config.comment).startswith( + 'was ') else config.name) for config in configs] + + offset = configs[0].indent + + # Accumulate path hierarchy based on indent size + configs = reduce( + lambda acc, config: acc + [replace( + config, + path=[*(acc[-1].path[0:config.indent - offset] if len(acc) > 0 else []), config.name])], configs, + []) + + configs = [replace(config, indent=0, leading_whitespace='') for config in configs if config.name != 'spares'] + + scrub = None + resilvering = None + scan = re.match( + r'(?Pscrub repaired|resilvered) (?P\S+) in (?P\S+) with (\d+) errors on (?P.+)$', + matches.get('scan', '')) + if scan: + scan_info = ZpoolScan( + at=datetime.strptime(scan.group('at'), '%a %b %d %H:%M:%S %Y'), + duration=parse_time_duration(scan.group('duration')), + corrected=parse_si_unit(scan.group('corrected')) + ) + if scan.group('activity') == 'scrub repaired': + scrub = scan_info + elif scan.group('activity') == 'resilvered': + resilvering = scan_info + + statuses.append(ZpoolStatus( + name=matches.get('pool', ''), + state=matches.get('state', ''), + configs=list(configs), + scrub=scrub, + resilvering=resilvering + )) + + return statuses + + +SI_UNITS = { + 'B': 0, + 'K': 1, + 'M': 2, + 'G': 3, + 'T': 4, + 'P': 5, + 'E': 6, + 'Z': 7, + 'Y': 8 +} + + +def parse_si_unit(value: str): + if value.isdecimal(): + return round(float(value)) + return round(float(value[:-1]) * (1024 ** SI_UNITS[value[-1].upper()])) + + +TIME_UNITS = { + 's': 'seconds', + 'm': 'minutes', + 'h': 'hours', + 'd': 'days', + 'w': 'weeks', + 'y': 'years' +} + + +def parse_time_duration(value: str): + delta = timedelta(seconds=0) + if ':' in value: + for p, n in enumerate(value.split(':')[::-1]): + unit = list(TIME_UNITS.values())[p] + delta += timedelta(**{unit: int(n)}) + return delta + + num = 0 + for c in value: + if c.isdecimal(): + num = num * 10 + int(c) + else: + delta += timedelta(**{TIME_UNITS[c]: num}) + num = 0 + return delta def main(): registry = CollectorRegistry() - funcs = (zpool_metadata, zpool_info, dataset_metadata, dataset_metrics) + funcs = (zpool_metadata, zpool_info, dataset_metadata, dataset_metrics, zpool_status) with ThreadPoolExecutor(max_workers=len(funcs)) as executor: for func in funcs: executor.submit(func, registry) - print(generate_latest(registry).decode(), end="") + print(generate_latest(registry).decode(), end='') main()