From f445f4fce8839c67de363dd78080656ade7723d2 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Thu, 11 Jul 2024 14:18:24 -0700 Subject: [PATCH] btrfs-progs: btrfstune: add ability to remove squotas When simple quotas is enabled, every new data extent gets a special inline OWNER_REF item that identifies the owning subvolume. This makes simple quotas backwards incompatible with kernels older than v6.7. Even if you disable quotas on the filesystem, the OWNER_REF items are sprinkled throughout the extent tree and older kernels are unable to parse them. However, it is relatively easy to simply walk the extent tree and remove these inline ref items. This gives squota adopters the option to *fully* disable squotas on their system and un-set the incompat bit. Add this capability to btrfstune, which requires only a little tricky btrfs item data shifting. This functionality was tested with a new unit test, as well as a similar but more thorough integration test in fstests Reviewed-by: Anand Jain Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov --- .../065-btrfstune-simple-quota/test.sh | 33 ++++ tune/main.c | 16 +- tune/quota.c | 160 ++++++++++++++++++ tune/tune.h | 1 + 4 files changed, 209 insertions(+), 1 deletion(-) create mode 100755 tests/misc-tests/065-btrfstune-simple-quota/test.sh diff --git a/tests/misc-tests/065-btrfstune-simple-quota/test.sh b/tests/misc-tests/065-btrfstune-simple-quota/test.sh new file mode 100755 index 0000000000..d7ccaf4e96 --- /dev/null +++ b/tests/misc-tests/065-btrfstune-simple-quota/test.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# Verify btrfstune for enabling and removing simple quotas + +source "$TEST_TOP/common" || exit +source "$TEST_TOP/common.convert" || exit + +check_experimental_build +setup_root_helper +prepare_test_dev + +# Create the fs without simple quota +run_check_mkfs_test_dev +run_check_mount_test_dev +populate_fs +run_check_umount_test_dev +# Enable simple quotas +run_check $SUDO_HELPER "$TOP/btrfstune" --enable-simple-quota "$TEST_DEV" +run_check_mount_test_dev +run_check $SUDO_HELPER dd if=/dev/zero of="$TEST_MNT"/file2 bs=1M count=1 +run_check_umount_test_dev +run_check $SUDO_HELPER "$TOP/btrfs" check "$TEST_DEV" + +# Populate new fs with simple quotas enabled +run_check_mkfs_test_dev -O squota +run_check_mount_test_dev +populate_fs +run_check_umount_test_dev +# Remove simple quotas +run_check $SUDO_HELPER "$TOP/btrfstune" --remove-simple-quota "$TEST_DEV" +run_check_mount_test_dev +run_check $SUDO_HELPER dd if=/dev/zero of="$TEST_MNT"/file3 bs=1M count=1 +run_check_umount_test_dev +run_check $SUDO_HELPER "$TOP/btrfs" check "$TEST_DEV" diff --git a/tune/main.c b/tune/main.c index 45417fb978..91e70eeb62 100644 --- a/tune/main.c +++ b/tune/main.c @@ -104,6 +104,7 @@ static const char * const tune_usage[] = { OPTLINE("-n", "enable no-holes feature (mkfs: no-holes, more efficient sparse file representation)"), OPTLINE("-S <0|1>", "set/unset seeding status of a device"), OPTLINE("--enable-simple-quota", "enable simple quotas on the file system. (mkfs: squota)"), + OPTLINE("--remove-simple-quota", "remove simple quotas from the file system."), OPTLINE("--convert-to-block-group-tree", "convert filesystem to track block groups in " "the separate block-group-tree instead of extent tree (sets the incompat bit)"), OPTLINE("--convert-from-block-group-tree", @@ -198,6 +199,7 @@ int BOX_MAIN(btrfstune)(int argc, char *argv[]) int ret; u64 super_flags = 0; int quota = 0; + int remove_simple_quota = 0; int fd = -1; int oflags = O_RDWR; @@ -209,7 +211,7 @@ int BOX_MAIN(btrfstune)(int argc, char *argv[]) GETOPT_VAL_DISABLE_BLOCK_GROUP_TREE, GETOPT_VAL_ENABLE_FREE_SPACE_TREE, GETOPT_VAL_ENABLE_SIMPLE_QUOTA, - + GETOPT_VAL_REMOVE_SIMPLE_QUOTA, }; static const struct option long_options[] = { { "help", no_argument, NULL, GETOPT_VAL_HELP}, @@ -221,6 +223,8 @@ int BOX_MAIN(btrfstune)(int argc, char *argv[]) GETOPT_VAL_ENABLE_FREE_SPACE_TREE}, { "enable-simple-quota", no_argument, NULL, GETOPT_VAL_ENABLE_SIMPLE_QUOTA }, + { "remove-simple-quota", no_argument, NULL, + GETOPT_VAL_REMOVE_SIMPLE_QUOTA}, #if EXPERIMENTAL { "csum", required_argument, NULL, GETOPT_VAL_CSUM }, #endif @@ -288,6 +292,10 @@ int BOX_MAIN(btrfstune)(int argc, char *argv[]) quota = 1; btrfstune_cmd_groups[QGROUP] = true; break; + case GETOPT_VAL_REMOVE_SIMPLE_QUOTA: + remove_simple_quota = 1; + btrfstune_cmd_groups[QGROUP] = true; + break; #if EXPERIMENTAL case GETOPT_VAL_CSUM: btrfs_warn_experimental( @@ -535,6 +543,12 @@ int BOX_MAIN(btrfstune)(int argc, char *argv[]) goto out; } + if (remove_simple_quota) { + ret = remove_squota(root->fs_info); + if (ret) + goto out; + } + out: if (ret < 0) { fs_info->readonly = 1; diff --git a/tune/quota.c b/tune/quota.c index a14f453078..16b2b3fb6c 100644 --- a/tune/quota.c +++ b/tune/quota.c @@ -6,6 +6,166 @@ #include "common/messages.h" #include "tune/tune.h" +static int remove_quota_tree(struct btrfs_fs_info *fs_info) +{ + int ret; + struct btrfs_root *quota_root = fs_info->quota_root; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_super_block *sb = fs_info->super_copy; + int super_flags = btrfs_super_incompat_flags(sb); + struct btrfs_trans_handle *trans; + + trans = btrfs_start_transaction(quota_root, 0); + ret = btrfs_clear_tree(trans, quota_root); + if (ret) { + btrfs_abort_transaction(trans, ret); + return ret; + } + + ret = btrfs_delete_and_free_root(trans, quota_root); + if (ret) { + btrfs_abort_transaction(trans, ret); + return ret; + } + fs_info->quota_root = NULL; + super_flags &= ~BTRFS_FEATURE_INCOMPAT_SIMPLE_QUOTA; + btrfs_set_super_incompat_flags(sb, super_flags); + btrfs_commit_transaction(trans, tree_root); + return 0; +} + +/* + * Given a pointer (ptr) into DATAi (i = slot), and an amount to shift, + * move all the data to the left (slots >= slot) of that ptr to the right by + * the shift amount. This overwrites the shift bytes after ptr, effectively + * removing them from the item data. We must update affected item sizes (only + * at slot) and offsets (slots >= slot). + * + * Leaf view, using '-' to show shift scale: + * Before: + * [ITEM0,...,ITEMi,...,ITEMn,-------,DATAn,...,[---DATAi---],...,DATA0] + * After: + * [ITEM0,...,ITEMi,...,ITEMn,--------,DATAn,...,[--DATAi---],...,DATA0] + * + * Zooming in on DATAi + * (ptr points at the start of the Ys, and shift is length of the Ys) + * Before: + * ...[DATAi+1][XXXXXXXXXXXXYYYYYYYYYYYYYYYYXXXXXXX][DATAi-1]... + * After: + * ...................[DATAi+1][XXXXXXXXXXXXXXXXXXX][DATAi-1]... + * Note that DATAi-1 and smaller are not affected. + */ +static void shift_leaf_data(struct btrfs_trans_handle *trans, + struct extent_buffer *leaf, int slot, + unsigned long ptr, u32 shift) +{ + u32 nr = btrfs_header_nritems(leaf); + u32 leaf_data_off = btrfs_item_ptr_offset(leaf, nr - 1); + u32 len = ptr - leaf_data_off; + u32 new_size = btrfs_item_size(leaf, slot) - shift; + for (int i = slot; i < nr; i++) { + u32 old_item_offset = btrfs_item_offset(leaf, i); + btrfs_set_item_offset(leaf, i, old_item_offset + shift); + } + memmove_extent_buffer(leaf, leaf_data_off + shift, leaf_data_off, len); + btrfs_set_item_size(leaf, slot, new_size); + btrfs_set_header_generation(leaf, trans->transid); + btrfs_mark_buffer_dirty(leaf); +} + +/* + * Iterate over the extent tree and for each EXTENT_DATA item that has an inline + * ref of type OWNER_REF, shift that leaf to eliminate the owner ref. + * + * Note: we use a search_slot per leaf rather than find_next_leaf to get the + * needed CoW-ing and rebalancing for each leaf and its path up to the root. + */ +static int remove_owner_refs(struct btrfs_fs_info *fs_info) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *extent_root; + struct btrfs_key key; + struct extent_buffer *leaf; + struct btrfs_path path = { 0 }; + int slot; + int ret; + + extent_root = btrfs_extent_root(fs_info, 0); + + trans = btrfs_start_transaction(extent_root, 0); + + key.objectid = 0; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = 0; + +search_slot: + ret = btrfs_search_slot(trans, extent_root, &key, &path, 1, 1); + if (ret < 0) + return ret; + leaf = path.nodes[0]; + slot = path.slots[0]; + + while (1) { + struct btrfs_key found_key; + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + u8 type; + unsigned long ptr; + unsigned long item_end; + + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(extent_root, &path); + if (ret < 0) { + break; + } else if (ret) { + ret = 0; + break; + } + leaf = path.nodes[0]; + slot = path.slots[0]; + btrfs_item_key_to_cpu(leaf, &key, slot); + btrfs_release_path(&path); + goto search_slot; + } + + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (found_key.type != BTRFS_EXTENT_ITEM_KEY) + goto next; + ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); + ptr = (unsigned long)(ei + 1); + item_end = (unsigned long)ei + btrfs_item_size(leaf, slot); + /* No inline extent references; accessing type is invalid. */ + if (ptr > item_end) + goto next; + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(leaf, iref); + if (type == BTRFS_EXTENT_OWNER_REF_KEY) + shift_leaf_data(trans, leaf, slot, ptr, sizeof(*iref)); +next: + slot++; + } + btrfs_release_path(&path); + + ret = btrfs_commit_transaction(trans, extent_root); + if (ret < 0) { + errno = -ret; + error_msg(ERROR_MSG_COMMIT_TRANS, "%m"); + return ret; + } + return 0; +} + +int remove_squota(struct btrfs_fs_info *fs_info) +{ + int ret; + + ret = remove_owner_refs(fs_info); + if (ret) + return ret; + + return remove_quota_tree(fs_info); +} + static int create_qgroup(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, u64 qgroupid) diff --git a/tune/tune.h b/tune/tune.h index 397cfe4f34..a41ba78b73 100644 --- a/tune/tune.h +++ b/tune/tune.h @@ -33,5 +33,6 @@ int convert_to_extent_tree(struct btrfs_fs_info *fs_info); int btrfs_change_csum_type(struct btrfs_fs_info *fs_info, u16 new_csum_type); int enable_quota(struct btrfs_fs_info *fs_info, bool simple); +int remove_squota(struct btrfs_fs_info *fs_info); #endif