From 0237a900487347a9e28ebe236b8c9d883bfe6762 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Mon, 19 May 2025 15:36:50 -0700 Subject: [PATCH 01/21] Refactor mirror map into header for use in other code Sponsored-by: Eshtek, creators of HexOS Sponsored-by: Klara, Inc. --- include/Makefile.am | 1 + include/sys/vdev_mirror.h | 73 +++++++++++++++++++++++++++++++++++++++ module/zfs/vdev_mirror.c | 68 ++++++++++++++---------------------- 3 files changed, 99 insertions(+), 43 deletions(-) create mode 100644 include/sys/vdev_mirror.h diff --git a/include/Makefile.am b/include/Makefile.am index 7588cd0aedc9..3312dd28c1c5 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -107,6 +107,7 @@ COMMON_H = \ sys/vdev_indirect_births.h \ sys/vdev_indirect_mapping.h \ sys/vdev_initialize.h \ + sys/vdev_mirror.h \ sys/vdev_raidz.h \ sys/vdev_raidz_impl.h \ sys/vdev_rebuild.h \ diff --git a/include/sys/vdev_mirror.h b/include/sys/vdev_mirror.h new file mode 100644 index 000000000000..0057d6cbfdba --- /dev/null +++ b/include/sys/vdev_mirror.h @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: CDDL-1.0 +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2025, Klara Inc. + */ + +#ifndef _SYS_VDEV_MIRROR_H +#define _SYS_VDEV_MIRROR_H + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Virtual device vector for mirroring. + */ +typedef struct mirror_child { + vdev_t *mc_vd; + abd_t *mc_abd; + uint64_t mc_offset; + int mc_error; + int mc_load; + uint8_t mc_tried; + uint8_t mc_skipped; + uint8_t mc_speculative; + uint8_t mc_rebuilding; +} mirror_child_t; + +typedef struct mirror_map { + int *mm_preferred; + int mm_preferred_cnt; + int mm_children; + boolean_t mm_resilvering; + boolean_t mm_rebuilding; + boolean_t mm_root; + mirror_child_t mm_child[]; +} mirror_map_t; + +mirror_map_t *vdev_mirror_map_alloc(int children, boolean_t resilvering, + boolean_t root); +void vdev_mirror_io_start_impl(zio_t *zio, mirror_map_t *mm); +void vdev_mirror_io_done(zio_t *zio); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_VDEV_MIRROR_H */ diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c index 18efdaac006f..7b645405ab82 100644 --- a/module/zfs/vdev_mirror.c +++ b/module/zfs/vdev_mirror.c @@ -39,6 +39,7 @@ #include #include #include +#include /* * Vdev mirror kstats @@ -99,31 +100,6 @@ vdev_mirror_stat_fini(void) } } -/* - * Virtual device vector for mirroring. - */ -typedef struct mirror_child { - vdev_t *mc_vd; - abd_t *mc_abd; - uint64_t mc_offset; - int mc_error; - int mc_load; - uint8_t mc_tried; - uint8_t mc_skipped; - uint8_t mc_speculative; - uint8_t mc_rebuilding; -} mirror_child_t; - -typedef struct mirror_map { - int *mm_preferred; - int mm_preferred_cnt; - int mm_children; - boolean_t mm_resilvering; - boolean_t mm_rebuilding; - boolean_t mm_root; - mirror_child_t mm_child[]; -} mirror_map_t; - static const int vdev_mirror_shift = 21; /* @@ -152,7 +128,7 @@ vdev_mirror_map_size(int children) sizeof (int) * children); } -static inline mirror_map_t * +mirror_map_t * vdev_mirror_map_alloc(int children, boolean_t resilvering, boolean_t root) { mirror_map_t *mm; @@ -175,7 +151,7 @@ vdev_mirror_map_free(zio_t *zio) kmem_free(mm, vdev_mirror_map_size(mm->mm_children)); } -static const zio_vsd_ops_t vdev_mirror_vsd_ops = { +zio_vsd_ops_t vdev_mirror_vsd_ops = { .vsd_free = vdev_mirror_map_free, }; @@ -601,24 +577,12 @@ vdev_mirror_child_select(zio_t *zio) return (-1); } -static void -vdev_mirror_io_start(zio_t *zio) +void +vdev_mirror_io_start_impl(zio_t *zio, mirror_map_t *mm) { - mirror_map_t *mm; mirror_child_t *mc; int c, children; - mm = vdev_mirror_map_init(zio); - zio->io_vsd = mm; - zio->io_vsd_ops = &vdev_mirror_vsd_ops; - - if (mm == NULL) { - ASSERT(!spa_trust_config(zio->io_spa)); - ASSERT(zio->io_type == ZIO_TYPE_READ); - zio_execute(zio); - return; - } - if (zio->io_type == ZIO_TYPE_READ) { if ((zio->io_flags & ZIO_FLAG_SCRUB) && !mm->mm_resilvering) { /* @@ -650,7 +614,6 @@ vdev_mirror_io_start(zio_t *zio) vdev_mirror_child_done, mc)); first = B_FALSE; } - zio_execute(zio); return; } /* @@ -690,6 +653,25 @@ vdev_mirror_io_start(zio_t *zio) zio->io_type, zio->io_priority, 0, vdev_mirror_child_done, mc)); } +} + +static void +vdev_mirror_io_start(zio_t *zio) +{ + mirror_map_t *mm; + + mm = vdev_mirror_map_init(zio); + zio->io_vsd = mm; + zio->io_vsd_ops = &vdev_mirror_vsd_ops; + + if (mm == NULL) { + ASSERT(!spa_trust_config(zio->io_spa)); + ASSERT(zio->io_type == ZIO_TYPE_READ); + zio_execute(zio); + return; + } + + vdev_mirror_io_start_impl(zio, mm); zio_execute(zio); } @@ -708,7 +690,7 @@ vdev_mirror_worst_error(mirror_map_t *mm) return (error[0] ? error[0] : error[1]); } -static void +void vdev_mirror_io_done(zio_t *zio) { mirror_map_t *mm = zio->io_vsd; From 023b06134032d9b0da4e3ee647793daa80f414d6 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Mon, 19 May 2025 15:55:44 -0700 Subject: [PATCH 02/21] Add sync_extra logic for anyraid to use Sponsored-by: Eshtek, creators of HexOS Sponsored-by: Klara, Inc. --- include/sys/vdev.h | 10 +++++- module/zfs/spa.c | 14 ++++++-- module/zfs/vdev_label.c | 76 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 95 insertions(+), 5 deletions(-) diff --git a/include/sys/vdev.h b/include/sys/vdev.h index 510474d6c085..7a5d4f6a5e6f 100644 --- a/include/sys/vdev.h +++ b/include/sys/vdev.h @@ -191,9 +191,17 @@ extern uint64_t vdev_queue_last_offset(vdev_t *vd); extern uint64_t vdev_queue_class_length(vdev_t *vq, zio_priority_t p); extern boolean_t vdev_queue_pool_busy(spa_t *spa); +typedef enum vdev_config_sync_status { + VDEV_CONFIG_NORMAL, + VDEV_CONFIG_CREATING_CHECKPOINT, + VDEV_CONFIG_DISCARDING_CHECKPOINT, + VDEV_CONFIG_REWINDING_CHECKPOINT +} vdev_config_sync_status_t; + extern void vdev_config_dirty(vdev_t *vd); extern void vdev_config_clean(vdev_t *vd); -extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg); +extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg, + vdev_config_sync_status_t status); extern void vdev_state_dirty(vdev_t *vd); extern void vdev_state_clean(vdev_t *vd); diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 34de3f1d9525..957370afb461 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -5499,7 +5499,8 @@ spa_ld_checkpoint_rewind(spa_t *spa) if (svdcount == SPA_SYNC_MIN_VDEVS) break; } - error = vdev_config_sync(svd, svdcount, spa->spa_first_txg); + error = vdev_config_sync(svd, svdcount, spa->spa_first_txg, + VDEV_CONFIG_REWINDING_CHECKPOINT); if (error == 0) spa->spa_last_synced_guid = rvd->vdev_guid; spa_config_exit(spa, SCL_ALL, FTAG); @@ -10329,6 +10330,13 @@ spa_sync_rewrite_vdev_config(spa_t *spa, dmu_tx_t *tx) { vdev_t *rvd = spa->spa_root_vdev; uint64_t txg = tx->tx_txg; + vdev_config_sync_status_t status; + if (dmu_tx_get_txg(tx) == spa->spa_checkpoint_txg + 1) + status = VDEV_CONFIG_CREATING_CHECKPOINT; + else if (spa->spa_checkpoint_txg == 0) + status = VDEV_CONFIG_DISCARDING_CHECKPOINT; + else + status = VDEV_CONFIG_NORMAL; for (;;) { int error = 0; @@ -10362,10 +10370,10 @@ spa_sync_rewrite_vdev_config(spa_t *spa, dmu_tx_t *tx) if (svdcount == SPA_SYNC_MIN_VDEVS) break; } - error = vdev_config_sync(svd, svdcount, txg); + error = vdev_config_sync(svd, svdcount, txg, status); } else { error = vdev_config_sync(rvd->vdev_child, - rvd->vdev_children, txg); + rvd->vdev_children, txg, status); } if (error == 0) diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index 7e222eac5edc..bb2a6baff863 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -1851,6 +1851,69 @@ vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags) return (good_writes >= 1 ? 0 : EIO); } +/* + * Write the extra data of the specified vdev. + */ +static void +vdev_extra_sync(zio_t *zio, uint64_t *good_writes, vdev_t *vd, int flags, + uint64_t txg, vdev_config_sync_status_t status) +{ + for (uint64_t c = 0; c < vd->vdev_children; c++) { + vdev_extra_sync(zio, good_writes, vd->vdev_child[c], flags, txg, + status); + } + + if (!vd->vdev_ops->vdev_op_leaf) + return; + + if (!vdev_writeable(vd)) + return; + + // TODO Invoke extra sync logic for anyraid +} + +/* Sync the extra data of all vdevs in svd[] */ +static int +vdev_extra_sync_list(vdev_t **svd, int svdcount, int flags, uint64_t txg, + vdev_config_sync_status_t status) +{ + spa_t *spa = svd[0]->vdev_spa; + zio_t *zio; + uint64_t good_writes = 0; + + boolean_t have_extra = B_FALSE; + + for (int i = 0; i < svdcount; i++) { + // TODO use this for anyraid + } + if (!have_extra) + return (0); + + zio = zio_root(spa, NULL, NULL, flags); + + for (int v = 0; v < svdcount; v++) + vdev_extra_sync(zio, &good_writes, svd[v], flags, txg, status); + + (void) zio_wait(zio); + + /* + * Flush the extra data to disk. This ensures that the odd labels + * are no longer needed (because the new uberblocks and the even + * labels are safely on disk), so it is safe to overwrite them. + */ + zio = zio_root(spa, NULL, NULL, flags); + + for (int v = 0; v < svdcount; v++) { + if (vdev_writeable(svd[v])) { + zio_flush(zio, svd[v]); + } + } + + (void) zio_wait(zio); + + return (good_writes >= 1 ? 0 : EIO); +} + /* * On success, increment the count of good writes for our top-level vdev. */ @@ -2034,7 +2097,8 @@ vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags) * at any time, you can just call it again, and it will resume its work. */ int -vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg) +vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg, + vdev_config_sync_status_t status) { spa_t *spa = svd[0]->vdev_spa; uberblock_t *ub = &spa->spa_uberblock; @@ -2112,6 +2176,16 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg) goto retry; } + if ((error = vdev_extra_sync_list(svd, svdcount, flags, txg, status) != + 0)) { + if ((flags & ZIO_FLAG_TRYHARD) != 0) { + zfs_dbgmsg("vdev_extra_sync_list() returned error %d " + "for pool '%s' when syncing out the extra data " + "of dirty vdevs", error, spa_name(spa)); + } + goto retry; + } + /* * Sync the uberblocks to all vdevs in svd[]. * If the system dies in the middle of this step, there are two cases From bf115b9f95bfbf844b7802fdc2552073918aac31 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Mon, 19 May 2025 16:06:50 -0700 Subject: [PATCH 03/21] Add weight biasing to segment based metaslabs Sponsored-by: Eshtek, creators of HexOS Sponsored-by: Klara, Inc. --- include/sys/metaslab_impl.h | 2 ++ module/zfs/metaslab.c | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h index 6ce995d0a086..9b30e4721df6 100644 --- a/include/sys/metaslab_impl.h +++ b/include/sys/metaslab_impl.h @@ -82,6 +82,8 @@ typedef enum trace_alloc_type { (METASLAB_WEIGHT_PRIMARY | METASLAB_WEIGHT_SECONDARY | \ METASLAB_WEIGHT_CLAIM) +#define METASLAB_MAX_WEIGHT (METASLAB_WEIGHT_TYPE - 1) + /* * The metaslab weight is used to encode the amount of free space in a * metaslab, such that the "best" metaslab appears first when sorting the diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 3f649ffb44e4..34c39c846a94 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -3410,6 +3410,18 @@ metaslab_segment_weight(metaslab_t *msp) weight = metaslab_weight_from_spacemap(msp); } + /* + * Anyraid vdevs strongly prefer allocations from earlier regions, in + * order to prevent premature region placement. While this optimization + * is not usually good for segment-based weighting, we enable it for + * that case specifically. + */ + vdev_t *vd = mg->mg_vd; + if (B_FALSE) { + weight = 2 * weight - (msp->ms_id * weight) / vd->vdev_ms_count; + weight = MIN(weight, METASLAB_MAX_WEIGHT); + } + /* * If the metaslab was active the last time we calculated its weight * then keep it active. We want to consume the entire region that From 5e106d7cb2d203d7197e185085d26f05fc9c9121 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Mon, 19 May 2025 16:14:28 -0700 Subject: [PATCH 04/21] Change vdev ops to support anyraid Sponsored-by: Eshtek, creators of HexOS Sponsored-by: Klara, Inc. --- include/sys/vdev_impl.h | 8 ++++++-- module/zfs/vdev.c | 9 ++++++--- module/zfs/vdev_draid.c | 9 +++++---- module/zfs/vdev_raidz.c | 7 ++++--- 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index afaa401343d9..1ee94a8cc5c0 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -70,7 +70,7 @@ typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size, uint64_t *ashift, uint64_t *pshift); typedef void vdev_close_func_t(vdev_t *vd); typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize, uint64_t txg); -typedef uint64_t vdev_min_asize_func_t(vdev_t *vd); +typedef uint64_t vdev_min_asize_func_t(vdev_t *pvd, vdev_t *cvd); typedef uint64_t vdev_min_alloc_func_t(vdev_t *vd); typedef void vdev_io_start_func_t(zio_t *zio); typedef void vdev_io_done_func_t(zio_t *zio); @@ -94,6 +94,7 @@ typedef uint64_t vdev_rebuild_asize_func_t(vdev_t *vd, uint64_t start, uint64_t size, uint64_t max_segment); typedef void vdev_metaslab_init_func_t(vdev_t *vd, uint64_t *startp, uint64_t *sizep); +typedef void vdev_metaslab_size_func_t(vdev_t *vd, uint64_t *shiftp); typedef void vdev_config_generate_func_t(vdev_t *vd, nvlist_t *nv); typedef uint64_t vdev_nparity_func_t(vdev_t *vd); typedef uint64_t vdev_ndisks_func_t(vdev_t *vd); @@ -121,6 +122,7 @@ typedef const struct vdev_ops { vdev_nparity_func_t *vdev_op_nparity; vdev_ndisks_func_t *vdev_op_ndisks; vdev_kobj_post_evt_func_t *vdev_op_kobj_evt_post; + vdev_metaslab_size_func_t *vdev_op_metaslab_size; char vdev_op_type[16]; boolean_t vdev_op_leaf; } vdev_ops_t; @@ -618,6 +620,8 @@ extern vdev_ops_t vdev_hole_ops; extern vdev_ops_t vdev_spare_ops; extern vdev_ops_t vdev_indirect_ops; +extern zio_vsd_ops_t vdev_mirror_vsd_ops; + /* * Common size functions */ @@ -625,7 +629,7 @@ extern void vdev_default_xlate(vdev_t *vd, const zfs_range_seg64_t *logical_rs, zfs_range_seg64_t *physical_rs, zfs_range_seg64_t *remain_rs); extern uint64_t vdev_default_psize(vdev_t *vd, uint64_t asize, uint64_t txg); extern uint64_t vdev_default_asize(vdev_t *vd, uint64_t psize, uint64_t txg); -extern uint64_t vdev_default_min_asize(vdev_t *vd); +extern uint64_t vdev_default_min_asize(vdev_t *pvd, vdev_t *cvd); extern uint64_t vdev_get_min_asize(vdev_t *vd); extern void vdev_set_min_asize(vdev_t *vd); extern uint64_t vdev_get_nparity(vdev_t *vd); diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 2754769eb759..beb29df2590f 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -377,9 +377,10 @@ vdev_default_asize(vdev_t *vd, uint64_t psize, uint64_t txg) } uint64_t -vdev_default_min_asize(vdev_t *vd) +vdev_default_min_asize(vdev_t *pvd, vdev_t *cvd) { - return (vd->vdev_min_asize); + (void) cvd; + return (pvd->vdev_min_asize); } /* @@ -408,7 +409,7 @@ vdev_get_min_asize(vdev_t *vd) return (P2ALIGN_TYPED(vd->vdev_asize, 1ULL << vd->vdev_ms_shift, uint64_t)); - return (pvd->vdev_ops->vdev_op_min_asize(pvd)); + return (pvd->vdev_ops->vdev_op_min_asize(pvd, vd)); } void @@ -3013,6 +3014,8 @@ vdev_metaslab_set_size(vdev_t *vd) if ((asize >> ms_shift) > zfs_vdev_ms_count_limit) ms_shift = highbit64(asize / zfs_vdev_ms_count_limit); } + if (vd->vdev_ops->vdev_op_metaslab_size) + vd->vdev_ops->vdev_op_metaslab_size(vd, &ms_shift); vd->vdev_ms_shift = ms_shift; ASSERT3U(vd->vdev_ms_shift, >=, SPA_MAXBLOCKSHIFT); diff --git a/module/zfs/vdev_draid.c b/module/zfs/vdev_draid.c index 8588cfee3f7d..f410bfb012ef 100644 --- a/module/zfs/vdev_draid.c +++ b/module/zfs/vdev_draid.c @@ -1164,14 +1164,15 @@ vdev_draid_get_astart(vdev_t *vd, const uint64_t start) * 1 / (children - nspares) of its asize. */ static uint64_t -vdev_draid_min_asize(vdev_t *vd) +vdev_draid_min_asize(vdev_t *pvd, vdev_t *cvd) { - vdev_draid_config_t *vdc = vd->vdev_tsd; + (void) cvd; + vdev_draid_config_t *vdc = pvd->vdev_tsd; - ASSERT3P(vd->vdev_ops, ==, &vdev_draid_ops); + ASSERT3P(pvd->vdev_ops, ==, &vdev_draid_ops); return (VDEV_DRAID_REFLOW_RESERVE + - (vd->vdev_min_asize + vdc->vdc_ndisks - 1) / (vdc->vdc_ndisks)); + (pvd->vdev_min_asize + vdc->vdc_ndisks - 1) / (vdc->vdc_ndisks)); } /* diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index 5fe70ec2b1d5..d4038982437d 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -2332,10 +2332,11 @@ vdev_raidz_psize_to_asize(vdev_t *vd, uint64_t psize, uint64_t txg) * so each child must provide at least 1/Nth of its asize. */ static uint64_t -vdev_raidz_min_asize(vdev_t *vd) +vdev_raidz_min_asize(vdev_t *pvd, vdev_t *cvd) { - return ((vd->vdev_min_asize + vd->vdev_children - 1) / - vd->vdev_children); + (void) cvd; + return ((pvd->vdev_min_asize + pvd->vdev_children - 1) / + pvd->vdev_children); } /* From 054add74e7d07c5f73d8d28c4b793fc19b8d985f Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Mon, 19 May 2025 16:36:16 -0700 Subject: [PATCH 05/21] New spa_misc functions for anyraid Sponsored-by: Eshtek, creators of HexOS Sponsored-by: Klara, Inc. --- include/sys/spa.h | 5 +++++ module/zfs/spa_misc.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/include/sys/spa.h b/include/sys/spa.h index 2a4cc60c4aa8..691a3287deb7 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -1083,9 +1083,12 @@ extern uint64_t spa_last_synced_txg(spa_t *spa); extern uint64_t spa_first_txg(spa_t *spa); extern uint64_t spa_syncing_txg(spa_t *spa); extern uint64_t spa_final_dirty_txg(spa_t *spa); +extern uint64_t spa_load_max_txg(spa_t *spa); +extern uint64_t spa_current_txg(spa_t *spa); extern uint64_t spa_version(spa_t *spa); extern pool_state_t spa_state(spa_t *spa); extern spa_load_state_t spa_load_state(spa_t *spa); +extern uint64_t spa_load_txg(spa_t *spa); extern uint64_t spa_freeze_txg(spa_t *spa); extern uint64_t spa_get_worst_case_asize(spa_t *spa, uint64_t lsize); extern void spa_get_min_alloc_range(spa_t *spa, uint64_t *min, uint64_t *max); @@ -1160,7 +1163,9 @@ extern boolean_t spa_has_pending_synctask(spa_t *spa); extern int spa_maxblocksize(spa_t *spa); extern int spa_maxdnodesize(spa_t *spa); extern boolean_t spa_has_checkpoint(spa_t *spa); +extern uint64_t spa_checkpoint_txg(spa_t *spa); extern boolean_t spa_importing_readonly_checkpoint(spa_t *spa); +extern boolean_t spa_importing_checkpoint(spa_t *spa); extern boolean_t spa_suspend_async_destroy(spa_t *spa); extern uint64_t spa_min_claim_txg(spa_t *spa); extern boolean_t zfs_dva_valid(spa_t *spa, const dva_t *dva, diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index bf22d2eb68e7..5aafdca5eb0d 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -1875,6 +1875,18 @@ spa_syncing_txg(spa_t *spa) return (spa->spa_syncing_txg); } +uint64_t +spa_load_max_txg(spa_t *spa) +{ + return (spa->spa_load_max_txg); +} + +uint64_t +spa_current_txg(spa_t *spa) +{ + return (spa->spa_uberblock.ub_txg); +} + /* * Return the last txg where data can be dirtied. The final txgs * will be used to just clear out any deferred frees that remain. @@ -2323,6 +2335,12 @@ spa_dirty_data(spa_t *spa) return (spa->spa_dsl_pool->dp_dirty_total); } +uint64_t +spa_load_txg(spa_t *spa) +{ + return (spa->spa_load_txg); +} + /* * ========================================================================== * SPA Import Progress Routines @@ -3042,6 +3060,12 @@ spa_has_checkpoint(spa_t *spa) return (spa->spa_checkpoint_txg != 0); } +uint64_t +spa_checkpoint_txg(spa_t *spa) +{ + return (spa->spa_checkpoint_txg); +} + boolean_t spa_importing_readonly_checkpoint(spa_t *spa) { @@ -3049,6 +3073,13 @@ spa_importing_readonly_checkpoint(spa_t *spa) spa->spa_mode == SPA_MODE_READ); } +boolean_t +spa_importing_checkpoint(spa_t *spa) +{ + return ((spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT) && + spa->spa_uberblock.ub_checkpoint_txg != 0); +} + uint64_t spa_min_claim_txg(spa_t *spa) { @@ -3148,9 +3179,12 @@ EXPORT_SYMBOL(spa_guid); EXPORT_SYMBOL(spa_last_synced_txg); EXPORT_SYMBOL(spa_first_txg); EXPORT_SYMBOL(spa_syncing_txg); +EXPORT_SYMBOL(spa_load_max_txg); +EXPORT_SYMBOL(spa_current_txg); EXPORT_SYMBOL(spa_version); EXPORT_SYMBOL(spa_state); EXPORT_SYMBOL(spa_load_state); +EXPORT_SYMBOL(spa_load_txg); EXPORT_SYMBOL(spa_freeze_txg); EXPORT_SYMBOL(spa_get_min_alloc_range); /* for Lustre */ EXPORT_SYMBOL(spa_get_dspace); @@ -3193,8 +3227,10 @@ EXPORT_SYMBOL(spa_missing_tvds_allowed); EXPORT_SYMBOL(spa_set_missing_tvds); EXPORT_SYMBOL(spa_state_to_name); EXPORT_SYMBOL(spa_importing_readonly_checkpoint); +EXPORT_SYMBOL(spa_importing_checkpoint); EXPORT_SYMBOL(spa_min_claim_txg); EXPORT_SYMBOL(spa_suspend_async_destroy); +EXPORT_SYMBOL(spa_checkpoint_txg); EXPORT_SYMBOL(spa_has_checkpoint); EXPORT_SYMBOL(spa_top_vdevs_spacemap_addressable); From 5a5174e3d1fb96cfcdd7387daa7d9e87fb5f9b64 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Mon, 19 May 2025 16:38:27 -0700 Subject: [PATCH 06/21] Anyraid implementation Signed-off-by: Paul Dagnelie Sponsored-by: Eshtek, creators of HexOS Sponsored-by: Klara, Inc. --- cmd/zdb/zdb.c | 490 +++++- cmd/zpool/zpool_vdev.c | 86 +- cmd/ztest.c | 85 +- include/Makefile.am | 1 + include/os/linux/kernel/linux/mod_compat.h | 1 + include/sys/fs/zfs.h | 6 + include/sys/vdev.h | 4 +- include/sys/vdev_anyraid.h | 270 +++ include/sys/vdev_impl.h | 1 + include/sys/vdev_mirror.h | 5 + include/sys/zio.h | 38 +- include/sys/zio_checksum.h | 4 +- include/zfeature_common.h | 1 + lib/libzfs/libzfs.abi | 16 +- lib/libzfs/libzfs_pool.c | 43 +- lib/libzpool/Makefile.am | 1 + man/man8/zdb.8 | 5 + module/Kbuild.in | 1 + module/Makefile.bsd | 1 + module/os/freebsd/zfs/sysctl_os.c | 2 + module/zcommon/zfeature_common.c | 4 + module/zfs/arc.c | 5 +- module/zfs/dmu.c | 7 +- module/zfs/metaslab.c | 47 +- module/zfs/spa.c | 67 +- module/zfs/spa_misc.c | 2 - module/zfs/vdev.c | 8 + module/zfs/vdev_anyraid.c | 1517 +++++++++++++++++ module/zfs/vdev_label.c | 11 +- module/zfs/zio.c | 11 +- module/zfs/zio_checksum.c | 35 +- tests/runfiles/common.run | 16 +- tests/runfiles/sanity.run | 3 +- tests/zfs-tests/include/default.cfg.in | 4 + tests/zfs-tests/include/libtest.shlib | 26 +- tests/zfs-tests/include/tunables.cfg | 1 + tests/zfs-tests/tests/Makefile.am | 21 + .../functional/anyraid/anyraid_checkpoint.ksh | 64 + .../anyraid/anyraid_clean_mirror_001_pos.ksh | 55 + .../anyraid/anyraid_clean_mirror_002_pos.ksh | 66 + .../anyraid/anyraid_clean_mirror_003_pos.ksh | 62 + .../functional/anyraid/anyraid_common.kshlib | 99 ++ ...nyraid_faildisk_write_replace_resilver.ksh | 90 + .../anyraid_offline_write_online_resilver.ksh | 128 ++ .../anyraid/anyraid_special_vdev_001_pos.ksh | 72 + .../anyraid/anyraid_special_vdev_002_pos.ksh | 72 + .../anyraid/anyraid_tile_layout.ksh | 70 + .../tests/functional/anyraid/cleanup.ksh | 34 + .../tests/functional/anyraid/default.cfg | 32 + .../tests/functional/anyraid/setup.ksh | 36 + .../cli_root/zfs_mount/zfs_mount.kshlib | 1 + .../functional/cli_root/zpool_add/cleanup.ksh | 2 + .../cli_root/zpool_add/zpool_add_001_pos.ksh | 23 +- .../cli_root/zpool_add/zpool_add_009_neg.ksh | 16 +- .../zpool_attach/zpool_attach_002_pos.ksh | 70 + .../zpool_attach/zpool_attach_003_pos.ksh | 99 ++ .../cli_root/zpool_create/cleanup.ksh | 2 + .../cli_root/zpool_create/zpool_create.shlib | 2 +- .../zpool_create/zpool_create_001_pos.ksh | 15 +- .../zpool_create/zpool_create_005_pos.ksh | 4 +- .../zpool_create/zpool_create_006_pos.ksh | 11 +- .../zpool_create/zpool_create_007_neg.ksh | 5 + .../zpool_create/zpool_create_009_neg.ksh | 14 +- .../zpool_create/zpool_create_010_neg.ksh | 1 + .../zpool_create_anyraid_001_pos.ksh | 63 + .../zpool_create_anyraid_002_pos.ksh | 69 + .../zpool_create_anyraid_003_pos.ksh | 61 + .../cli_root/zpool_export/cleanup.ksh | 1 + .../zpool_export_anyraid_001_pos.ksh | 61 + .../cli_root/zpool_get/zpool_get.cfg | 1 + .../zpool_import/zpool_import_010_pos.ksh | 15 +- .../cli_root/zpool_initialize/cleanup.ksh | 2 + .../cli_root/zpool_initialize/setup.ksh | 35 + .../zpool_initialize_anyraid_attach.ksh | 56 + ..._initialize_fault_export_import_online.ksh | 41 +- .../zpool_initialize_import_export.ksh | 64 +- ...nitialize_offline_export_import_online.ksh | 47 +- .../zpool_initialize_online_offline.ksh | 60 +- .../zpool_initialize_start_and_cancel_neg.ksh | 32 +- .../zpool_initialize_start_and_cancel_pos.ksh | 21 +- .../zpool_initialize_uninit.ksh | 100 +- .../zpool_initialize_verify_checksums.ksh | 27 +- .../zpool_initialize_verify_initialized.ksh | 53 +- .../functional/fault/auto_spare_001_pos.ksh | 19 +- .../functional/fault/auto_spare_002_pos.ksh | 20 +- .../tests/functional/fault/cleanup.ksh | 2 + .../tests/functional/fault/fault.cfg | 2 +- .../tests/functional/fault/setup.ksh | 3 + .../tests/functional/trim/autotrim_config.ksh | 8 +- .../functional/trim/autotrim_integrity.ksh | 4 +- .../trim/autotrim_trim_integrity.ksh | 4 +- .../tests/functional/trim/cleanup.ksh | 2 + .../zfs-tests/tests/functional/trim/setup.ksh | 3 + .../tests/functional/trim/trim_config.ksh | 26 +- .../tests/functional/trim/trim_integrity.ksh | 4 +- 95 files changed, 4543 insertions(+), 354 deletions(-) create mode 100644 include/sys/vdev_anyraid.h create mode 100644 module/zfs/vdev_anyraid.c create mode 100755 tests/zfs-tests/tests/functional/anyraid/anyraid_checkpoint.ksh create mode 100755 tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_001_pos.ksh create mode 100755 tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_002_pos.ksh create mode 100755 tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_003_pos.ksh create mode 100644 tests/zfs-tests/tests/functional/anyraid/anyraid_common.kshlib create mode 100755 tests/zfs-tests/tests/functional/anyraid/anyraid_faildisk_write_replace_resilver.ksh create mode 100755 tests/zfs-tests/tests/functional/anyraid/anyraid_offline_write_online_resilver.ksh create mode 100755 tests/zfs-tests/tests/functional/anyraid/anyraid_special_vdev_001_pos.ksh create mode 100755 tests/zfs-tests/tests/functional/anyraid/anyraid_special_vdev_002_pos.ksh create mode 100755 tests/zfs-tests/tests/functional/anyraid/anyraid_tile_layout.ksh create mode 100755 tests/zfs-tests/tests/functional/anyraid/cleanup.ksh create mode 100644 tests/zfs-tests/tests/functional/anyraid/default.cfg create mode 100755 tests/zfs-tests/tests/functional/anyraid/setup.ksh create mode 100755 tests/zfs-tests/tests/functional/cli_root/zpool_attach/zpool_attach_002_pos.ksh create mode 100755 tests/zfs-tests/tests/functional/cli_root/zpool_attach/zpool_attach_003_pos.ksh create mode 100755 tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_001_pos.ksh create mode 100755 tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_002_pos.ksh create mode 100755 tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_003_pos.ksh create mode 100755 tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_anyraid_001_pos.ksh create mode 100755 tests/zfs-tests/tests/functional/cli_root/zpool_initialize/setup.ksh create mode 100755 tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_anyraid_attach.ksh diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index fa8e7fa691db..aec834ddca89 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include #include @@ -110,6 +111,7 @@ enum { ARG_ALLOCATED = 256, ARG_BLOCK_BIN_MODE, ARG_BLOCK_CLASSES, + ARG_ANYRAID_MAP, }; static const char cmdname[] = "zdb"; @@ -742,9 +744,10 @@ usage(void) "\t\t ::[:]\n" "\t%s -E [-A] word0:word1:...:word15\n" "\t%s -S [-AP] [-e [-V] [-p ...]] [-U ] " - "\n\n", + "\n" + "\t%s --anyraid-map [ ...]\n\n", cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, - cmdname, cmdname, cmdname, cmdname, cmdname); + cmdname, cmdname, cmdname, cmdname, cmdname, cmdname); (void) fprintf(stderr, " Dataset name must include at least one " "separator character '/' or '@'\n"); @@ -9311,7 +9314,8 @@ zdb_read_block(char *thing, spa_t *spa) if ((zio_checksum_table[ck].ci_flags & ZCHECKSUM_FLAG_EMBEDDED) || - ck == ZIO_CHECKSUM_NOPARITY) { + ck == ZIO_CHECKSUM_NOPARITY || + ck == ZIO_CHECKSUM_ANYRAID_MAP) { continue; } BP_SET_CHECKSUM(bp, ck); @@ -9432,10 +9436,470 @@ dummy_get_file_info(dmu_object_type_t bonustype, const void *data, abort(); } +static int +log_10(uint64_t v) { + char buf[32]; + snprintf(buf, sizeof (buf), "%llu", (u_longlong_t)v); + return (strlen(buf)); +} + +static void +print_separator_line(int cols, int colwidth, boolean_t *print, boolean_t *final) +{ + char buf[64]; + ASSERT3U(colwidth, <, sizeof (buf) - 2); + int len = 0; + // Create a buffer with the cell separator to make later code simpler. + while (len < colwidth) { + len += snprintf(buf + len, sizeof (buf) - len, "─"); + } + + for (int i = 0; i < cols; i++) { + /* + * Skip cells that we don't need to print. If the previous cell] + * also wasn't printed, add an extra space for the separator + * column. + */ + if (!print[i]) { + int extra_width = 0; + if (i == 0 || !print[i - 1]) + extra_width++; + (void) printf("%*s", colwidth + extra_width, ""); + continue; + } + + // Calculate the right shape for the corner of the cells. + const char *left_c, *right_c; + if (i == 0 || !print[i - 1]) { + left_c = (final[i] && (i == 0 || final[i - 1])) ? + "└" : "├"; + } else { + left_c = ""; + } + if (i == cols - 1 || !print[i + 1]) { + right_c = + (final[i] && (i == cols - 1 || final[i + 1])) ? + "┘" : "┤"; + } else { + right_c = + (final[i] && (i == cols - 1 || final[i + 1])) ? + "┴" : "┼"; + } + (void) printf("%s%s%s", left_c, buf, right_c); + } + (void) printf("\n"); +} + +static void +zdb_print_anyraid_tile_layout(vdev_t *vd) +{ + ASSERT3P(vd->vdev_ops, ==, &vdev_anyraid_ops); + vdev_anyraid_t *var = vd->vdev_tsd; + int cols = vd->vdev_children; + int textwidth = MAX(8, log_10(avl_numnodes(&var->vd_tile_map)) + + var->vd_nparity > 0 ? log_10(var->vd_nparity + 1) + 1 : 0); + int colwidth = textwidth + 2; + + // Create and populate table with all the values we need to print. + char ***table = malloc(sizeof (*table) * cols); + for (int i = 0; i < cols; i++) { + table[i] = calloc(var->vd_children[i]->van_capacity, + sizeof (**table)); + } + + anyraid_tile_t *cur = avl_first(&var->vd_tile_map); + while (cur) { + int p = 0; + for (anyraid_tile_node_t *node = list_head(&cur->at_list); + node; node = list_next(&cur->at_list, node)) { + ASSERT3U(p, <=, var->vd_nparity + 1); + char **next = + &(table[node->atn_disk][node->atn_offset]); + *next = malloc(textwidth + 1); + int len = snprintf(*next, textwidth, "%d", + cur->at_tile_id); + if (var->vd_nparity > 0) { + (void) snprintf((*next) + len, textwidth - len, + "-%d", p); + } + p++; + } + ASSERT3U(p, ==, var->vd_nparity + 1); + cur = AVL_NEXT(&var->vd_tile_map, cur); + } + + // These are needed to generate the separator lines + boolean_t *printed = malloc(sizeof (*printed) * cols); + boolean_t *final = malloc(sizeof (*final) * cols); + // Print the header row + for (int i = 0; i < cols; i++) { + if (i == 0) + (void) printf("│"); + (void) printf(" %*d ", textwidth, i); + (void) printf("│"); + printed[i] = B_TRUE; + final[i] = B_FALSE; + } + (void) printf("\n"); + print_separator_line(cols, colwidth, printed, final); + + // Print out the actual tile map, one row at a time. + for (int i = 0; ; i++) { + int last_printed = INT_MAX; + for (int v = 0; v < cols; v++) { + if (final[v]) { + ASSERT3U(i, >=, + var->vd_children[v]->van_capacity); + int extra_width = 0; + if (v == 0 || !printed[v - 1]) + extra_width++; + (void) printf("%*s", + colwidth + extra_width, ""); + printed[v] = B_FALSE; + continue; + } + if (i + 1 == var->vd_children[v]->van_capacity) + final[v] = B_TRUE; + if (v - 1 != last_printed) + (void) printf("│"); + char *value = table[v][i]; + (void) printf(" %*s │", textwidth, value ? value : + ""); + last_printed = v; + } + + if (last_printed == INT_MAX) + break; + (void) printf("\n"); + print_separator_line(cols, colwidth, printed, final); + } + (void) printf("\n"); + for (int i = 0; i < cols; i++) { + for (int j = 0; j < var->vd_children[i]->van_capacity; j++) + if (table[i][j]) + free(table[i][j]); + free(table[i]); + } + free(table); +} + +static void +free_header(anyraid_header_t *header, uint64_t header_size) { + fnvlist_free(header->ah_nvl); + abd_return_buf(header->ah_abd, header->ah_buf, header_size); + abd_free(header->ah_abd); +} + +/* + * Print one of the anyraid maps from the given vdev child. This prints the + * mapping entries themselves, rather than the kernel's interpretation of them, + * which can be useful for debugging. + */ +static void +print_anyraid_mapping(vdev_t *vd, int child, int mapping, + anyraid_header_t *header) +{ + vdev_anyraid_t *var = vd->vdev_tsd; + vdev_t *cvd = vd->vdev_child[child]; + uint64_t ashift = cvd->vdev_ashift; + spa_t *spa = vd->vdev_spa; + int error = 0; + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | + ZIO_FLAG_SPECULATIVE; + + uint64_t header_offset = VDEV_LABEL_START_SIZE + + mapping * VDEV_ANYRAID_SINGLE_MAP_SIZE(ashift); + uint64_t header_size = VDEV_ANYRAID_MAP_HEADER_SIZE(ashift); + uint64_t map_offset = header_offset + header_size; + + nvlist_t *hnvl = header->ah_nvl; + // Look up and print map metadata. + uint16_t version; + if (nvlist_lookup_uint16(hnvl, VDEV_ANYRAID_HEADER_VERSION, + &version) != 0) { + (void) printf("No version\n"); + free_header(header, header_size); + return; + } + + uint64_t tile_size; + if (nvlist_lookup_uint64(hnvl, VDEV_ANYRAID_HEADER_TILE_SIZE, + &tile_size) != 0) { + (void) printf("No tile size\n"); + free_header(header, header_size); + return; + } + + uint32_t map_length; + if (nvlist_lookup_uint32(hnvl, VDEV_ANYRAID_HEADER_LENGTH, + &map_length) != 0) { + (void) printf("No map length\n"); + free_header(header, header_size); + return; + } + + uint64_t written_txg = 0; + if (nvlist_lookup_uint64(hnvl, VDEV_ANYRAID_HEADER_TXG, + &written_txg) != 0) + (void) printf("No valid TXG\n"); + + uint8_t disk_id = 0; + if (nvlist_lookup_uint8(hnvl, VDEV_ANYRAID_HEADER_DISK, + &disk_id) != 0) + (void) printf("No valid disk ID\n"); + + (void) printf("version: %6d\ttile size: %8lx\ttxg: %lu\n", + version, tile_size, written_txg); + (void) printf("map length: %6u\tdisk id: %3u\n", map_length, disk_id); + + // Read in and print the actual mapping data + zio_t *rio = zio_root(spa, NULL, NULL, flags); + abd_t *map_abds[VDEV_ANYRAID_MAP_COPIES] = {0}; + int i; + for (i = 0; i <= (map_length / SPA_MAXBLOCKSIZE); i++) { + zio_eck_t *cksum = (zio_eck_t *) + &header->ah_buf[VDEV_ANYRAID_NVL_BYTES(ashift) + + i * sizeof (*cksum)]; + map_abds[i] = abd_alloc_linear(SPA_MAXBLOCKSIZE, B_TRUE); + zio_nowait(zio_read_phys(rio, cvd, map_offset + + i * SPA_MAXBLOCKSIZE, SPA_MAXBLOCKSIZE, map_abds[i], + ZIO_CHECKSUM_ANYRAID_MAP, NULL, cksum, + ZIO_PRIORITY_SYNC_READ, flags, B_FALSE)); + } + i--; + if ((error = zio_wait(rio))) { + (void) printf("Could not read map: %s\n", strerror(error)); + for (; i >= 0; i--) + abd_free(map_abds[i]); + free_header(header, header_size); + return; + } + free_header(header, header_size); + + uint32_t map = -1, cur_tile = 0; + /* + * For now, all entries are the size of a uint32_t. If that + * ever changes, we need better logic here. + */ + uint32_t size = sizeof (uint32_t); + uint8_t *map_buf = NULL; + uint8_t par_cnt = 0; + for (uint32_t off = 0; off < map_length; off += size) { + int next_map = off / SPA_MAXBLOCKSIZE; + if (map != next_map) { + // switch maps + if (map != -1) { + abd_return_buf(map_abds[map], map_buf, + SPA_MAXBLOCKSIZE); + } + map_buf = abd_borrow_buf(map_abds[next_map], + SPA_MAXBLOCKSIZE); + map = next_map; + } + uint32_t mo = off % SPA_MAXBLOCKSIZE; + anyraid_map_entry_t *entry = + (anyraid_map_entry_t *)(map_buf + mo); + uint8_t type = entry->ame_u.ame_amle.amle_type; + uint8_t *buf; + boolean_t allocated = B_FALSE; + if (size > SPA_MAXBLOCKSIZE - mo) { + buf = kmem_alloc(size, KM_SLEEP); + uint8_t rem = SPA_MAXBLOCKSIZE - mo; + allocated = B_TRUE; + memcpy(buf, map_buf + mo, rem); + // switch maps + if (map != -1) { + abd_return_buf(map_abds[map], map_buf, + SPA_MAXBLOCKSIZE); + } + map_buf = abd_borrow_buf(map_abds[next_map], + SPA_MAXBLOCKSIZE); + map = next_map; + memcpy(buf + rem, map_buf, size - rem); + } else { + buf = map_buf + mo; + } + entry = (anyraid_map_entry_t *)buf; + switch (type) { + case AMET_SKIP: { + anyraid_map_skip_entry_t *amse = + &entry->ame_u.ame_amse; + ASSERT0(par_cnt); + cur_tile += amse_get_skip_count(amse); + (void) printf("skip %u\n", + amse_get_skip_count(amse)); + break; + } + case AMET_LOC: { + anyraid_map_loc_entry_t *amle = + &entry->ame_u.ame_amle; + if (par_cnt == 0) { + (void) printf("loc %u:", cur_tile); + cur_tile++; + } + (void) printf("\td%u o%u,", amle->amle_disk, + amle->amle_offset); + par_cnt = (par_cnt + 1) % (var->vd_nparity + 1); + if (par_cnt == 0) + (void) printf("\n"); + break; + } + default: + (void) printf("Invalid entry type %d, " + "aborting\n", type); + break; + } + if (allocated) + kmem_free(buf, size); + } + if (map_buf) + abd_return_buf(map_abds[map], map_buf, SPA_MAXBLOCKSIZE); + + var->vd_tile_size = tile_size; + + for (; i >= 0; i--) + abd_free(map_abds[i]); + + return; + +} + +/* + * Print the anyraid maps on disk. With verbosity == 2, we use the normal + * mapping-selection logic that we use during import; with higher verbosity, we + * print them all. + */ +static void +zdb_print_anyraid_ondisk_maps(vdev_t *vd, int verbosity) +{ + int child = 0; + spa_config_enter(spa, SCL_ZIO, FTAG, RW_READER); + if (verbosity == 2) { + anyraid_header_t header; + int mapping; + uint64_t txg; + int error = vdev_anyraid_pick_best_mapping( + vd->vdev_child[child], &txg, &header, &mapping); + if (error != 0) { + (void) printf("Could not print mapping: %s\n", + strerror(error)); + spa_config_exit(spa, SCL_ZIO, FTAG); + return; + } + (void) printf("anyraid map %d:\n", mapping); + print_anyraid_mapping(vd, child, mapping, &header); + } else if (verbosity == 3) { + for (int i = 0; i < VDEV_ANYRAID_MAP_COPIES; i++) { + (void) printf("anyraid map %d:\n", i); + anyraid_header_t header; + int error = vdev_anyraid_open_header( + vd->vdev_child[child], i, &header); + if (error != 0) { + (void) printf("Could not print mapping: %s\n", + strerror(error)); + spa_config_exit(spa, SCL_ZIO, FTAG); + return; + } + print_anyraid_mapping(vd, child, i, &header); + } + } else { + for (; child < vd->vdev_children; child++) { + for (int i = 0; i < VDEV_ANYRAID_MAP_COPIES; i++) { + (void) printf("anyraid map %d %d:\n", child, i); + anyraid_header_t header; + int error = vdev_anyraid_open_header( + vd->vdev_child[child], i, &header); + if (error != 0) { + (void) printf("Could not print " + "mapping: %s\n", strerror(error)); + continue; + } + print_anyraid_mapping(vd, child, i, &header); + } + } + + } + spa_config_exit(spa, SCL_ZIO, FTAG); +} + +/* + * Print the loaded version of the map for the provided anyraid vdev. + */ +static void +zdb_dump_anyraid_map_vdev(vdev_t *vd, int verbosity) +{ + ASSERT3P(vd->vdev_ops, ==, &vdev_anyraid_ops); + vdev_anyraid_t *var = vd->vdev_tsd; + + (void) printf("\t%-5s%11llu %s %16llx\n", + "vdev", (u_longlong_t)vd->vdev_id, + "tile_size", (u_longlong_t)var->vd_tile_size); + (void) printf("\t%-8s%8llu %-12s %10u\n", "tiles", + (u_longlong_t)avl_numnodes(&var->vd_tile_map), + "checkpoint tile", var->vd_checkpoint_tile); + (void) printf("\t%16s %12s %13s\n", "----------------", + "------------", "-------------"); + + anyraid_tile_t *cur = avl_first(&var->vd_tile_map); + anyraid_tile_node_t *curn = cur != NULL ? + list_head(&cur->at_list) : NULL; + while (cur) { + (void) printf("\t%-8s%8llu %-8s%04llx %-11s%02llx\n", + "tile", (u_longlong_t)cur->at_tile_id, + "offset", (u_longlong_t)curn->atn_offset, + "disk", (u_longlong_t)curn->atn_disk); + curn = list_next(&cur->at_list, curn); + if (curn == NULL) { + cur = AVL_NEXT(&var->vd_tile_map, cur); + curn = cur != NULL ? list_head(&cur->at_list) : NULL; + } + } + + (void) printf("\n"); + if (verbosity > 0) + zdb_print_anyraid_tile_layout(vd); + + if (verbosity > 1) + zdb_print_anyraid_ondisk_maps(vd, verbosity); +} + +static int +zdb_dump_anyraid_map(char *vdev_str, spa_t *spa, int verbosity) +{ + vdev_t *rvd, *vd; + + (void) printf("\nAnyRAID tiles:\n"); + + /* A specific vdev. */ + if (vdev_str != NULL) { + vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev_str); + if (vd == NULL) { + (void) printf("Invalid vdev: %s\n", vdev_str); + return (EINVAL); + } + if (vd->vdev_ops != &vdev_anyraid_ops) { + (void) printf("Not an anyraid vdev: %s\n", vdev_str); + return (EINVAL); + } + zdb_dump_anyraid_map_vdev(vd, verbosity); + return (0); + } + + /* All anyraid vdevs. */ + rvd = spa->spa_root_vdev; + for (uint64_t c = 0; c < rvd->vdev_children; c++) { + vd = rvd->vdev_child[c]; + if (vd->vdev_ops == &vdev_anyraid_ops) + zdb_dump_anyraid_map_vdev(vd, verbosity); + } + return (0); +} + int main(int argc, char **argv) { - int c; + int c, long_index; + boolean_t opt_anyraid_map = B_FALSE; int dump_all = 1; int verbose = 0; int error = 0; @@ -9539,12 +10003,14 @@ main(int argc, char **argv) ARG_BLOCK_BIN_MODE}, {"class", required_argument, NULL, ARG_BLOCK_CLASSES}, + {"anyraid-map", required_argument, NULL, + ARG_ANYRAID_MAP}, {0, 0, 0, 0} }; while ((c = getopt_long(argc, argv, "AbBcCdDeEFGhiI:kK:lLmMNo:Op:PqrRsSt:TuU:vVx:XYyZ", - long_options, NULL)) != -1) { + long_options, &long_index)) != -1) { switch (c) { case 'b': case 'B': @@ -9705,6 +10171,10 @@ main(int argc, char **argv) free(buf); break; } + case ARG_ANYRAID_MAP: + opt_anyraid_map = B_TRUE; + dump_all = 0; + break; default: usage(); break; @@ -10115,6 +10585,16 @@ main(int argc, char **argv) argc--; if (dump_opt['r']) { error = zdb_copy_object(os, object, argv[1]); + } else if (opt_anyraid_map) { + if (argc == 0) + error = zdb_dump_anyraid_map(NULL, spa, verbose); + else + for (int i = 0; i < argc; i++) { + error = zdb_dump_anyraid_map(argv[i], spa, + verbose); + if (error != 0) + break; + } } else if (!dump_opt['R']) { flagbits['d'] = ZOR_FLAG_DIRECTORY; flagbits['f'] = ZOR_FLAG_PLAIN_FILE; diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c index 222b5524669e..8ab6f8dff069 100644 --- a/cmd/zpool/zpool_vdev.c +++ b/cmd/zpool/zpool_vdev.c @@ -78,6 +78,7 @@ #include "zpool_util.h" #include #include +#include /* * For any given vdev specification, we can have multiple errors. The @@ -431,7 +432,8 @@ is_raidz_mirror(replication_level_t *a, replication_level_t *b, { if ((strcmp(a->zprl_type, "raidz") == 0 || strcmp(a->zprl_type, "draid") == 0) && - strcmp(b->zprl_type, "mirror") == 0) { + (strcmp(b->zprl_type, "mirror") == 0 || + strcmp(b->zprl_type, "anyraid") == 0)) { *raidz = a; *mirror = b; return (B_TRUE); @@ -541,6 +543,7 @@ get_replication(nvlist_t *nvroot, boolean_t fatal) * already reported an error for this spec, so don't * bother doing it again. */ + const char *orig_type = type; type = NULL; dontreport = 0; vdev_size = -1LL; @@ -646,7 +649,8 @@ get_replication(nvlist_t *nvroot, boolean_t fatal) if (!dontreport && (vdev_size != -1LL && (llabs(size - vdev_size) > - ZPOOL_FUZZ))) { + ZPOOL_FUZZ)) && strcmp(orig_type, + VDEV_TYPE_ANYRAID) != 0) { if (ret != NULL) free(ret); ret = NULL; @@ -726,19 +730,6 @@ get_replication(nvlist_t *nvroot, boolean_t fatal) else return (NULL); } - } else if (strcmp(lastrep.zprl_type, rep.zprl_type) != - 0) { - if (ret != NULL) - free(ret); - ret = NULL; - if (fatal) - vdev_error(gettext( - "mismatched replication level: " - "both %s and %s vdevs are " - "present\n"), - lastrep.zprl_type, rep.zprl_type); - else - return (NULL); } else if (lastrep.zprl_parity != rep.zprl_parity) { if (ret) free(ret); @@ -1200,7 +1191,7 @@ is_device_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force, } /* - * Returns the parity level extracted from a raidz or draid type. + * Returns the parity level extracted from a raidz, anyraid, or draid type. * If the parity cannot be determined zero is returned. */ static int @@ -1228,6 +1219,22 @@ get_parity(const char *type) return (0); } } + } else if (strncmp(type, VDEV_TYPE_ANYRAID, + strlen(VDEV_TYPE_ANYRAID)) == 0) { + p = type + strlen(VDEV_TYPE_ANYRAID); + + if (*p == '\0') { + /* when unspecified default to 1-parity mirror */ + return (1); + } else { + char *end; + errno = 0; + parity = strtol(p, &end, 10); + if (errno != 0 || *end != '\0' || + parity < 0 || parity > VDEV_ANYRAID_MAXPARITY) { + return (0); + } + } } else if (strncmp(type, VDEV_TYPE_DRAID, strlen(VDEV_TYPE_DRAID)) == 0) { p = type + strlen(VDEV_TYPE_DRAID); @@ -1285,6 +1292,15 @@ is_grouping(const char *type, int *mindev, int *maxdev) if (maxdev != NULL) *maxdev = INT_MAX; + if (strncmp(type, VDEV_TYPE_ANYRAID, strlen(VDEV_TYPE_ANYRAID)) == 0) { + nparity = get_parity(type); + if (mindev != NULL) + *mindev = nparity + 1; + if (maxdev != NULL) + *maxdev = 255; + return (VDEV_TYPE_ANYRAID); + } + if (strcmp(type, "mirror") == 0) { if (mindev != NULL) *mindev = 2; @@ -1319,6 +1335,22 @@ is_grouping(const char *type, int *mindev, int *maxdev) return (NULL); } +static int +anyraid_config_by_type(nvlist_t *nv, const char *type) +{ + uint64_t nparity = 0; + + if (strncmp(type, VDEV_TYPE_ANYRAID, strlen(VDEV_TYPE_ANYRAID)) != 0) + return (EINVAL); + + nparity = (uint64_t)get_parity(type); + + fnvlist_add_uint8(nv, ZPOOL_CONFIG_ANYRAID_PARITY_TYPE, VAP_MIRROR); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, nparity); + + return (0); +} + /* * Extract the configuration parameters encoded in the dRAID type and * use them to generate a dRAID configuration. The expected format is: @@ -1527,9 +1559,9 @@ construct_spec(nvlist_t *props, int argc, char **argv) nv = NULL; /* - * If it's a mirror, raidz, or draid the subsequent arguments - * are its leaves -- until we encounter the next mirror, - * raidz or draid. + * If it's a mirror, raidz, anyraid, or draid the subsequent + * arguments are its leaves -- until we encounter the next + * mirror, raidz, anyraid, or draid. */ if ((type = is_grouping(fulltype, &mindev, &maxdev)) != NULL) { nvlist_t **child = NULL; @@ -1596,7 +1628,12 @@ construct_spec(nvlist_t *props, int argc, char **argv) } if (is_log) { - if (strcmp(type, VDEV_TYPE_MIRROR) != 0) { + /* + * TODO: only AnyRAID mirror is expected to be + * allowed. + */ + if (strcmp(type, VDEV_TYPE_MIRROR) != 0 && + strcmp(type, VDEV_TYPE_ANYRAID) != 0) { (void) fprintf(stderr, gettext("invalid vdev " "specification: unsupported 'log' " @@ -1690,6 +1727,15 @@ construct_spec(nvlist_t *props, int argc, char **argv) ZPOOL_CONFIG_NPARITY, mindev - 1) == 0); } + if (strcmp(type, VDEV_TYPE_ANYRAID) == 0) { + if (anyraid_config_by_type(nv, fulltype) + != 0) { + for (c = 0; c < children; c++) + nvlist_free(child[c]); + free(child); + goto spec_out; + } + } if (strcmp(type, VDEV_TYPE_DRAID) == 0) { if (draid_config_by_type(nv, fulltype, children) != 0) { diff --git a/cmd/ztest.c b/cmd/ztest.c index 89b1f68606ea..9a6ba6072954 100644 --- a/cmd/ztest.c +++ b/cmd/ztest.c @@ -106,6 +106,7 @@ #include #include #include +#include #include #include #include @@ -278,6 +279,7 @@ extern uint64_t raidz_expand_max_reflow_bytes; extern uint_t raidz_expand_pause_point; extern boolean_t ddt_prune_artificial_age; extern boolean_t ddt_dump_prune_histogram; +extern uint64_t zfs_anyraid_min_tile_size; static ztest_shared_opts_t *ztest_shared_opts; @@ -673,10 +675,12 @@ fatal(int do_perror, const char *message, ...) fatal_msg = buf; /* to ease debugging */ out: - if (ztest_dump_core) + if (ztest_dump_core) { abort(); - else + } else { + // NOTE: Not safe if we've called kernel_fini already dump_debug_buffer(); + } exit(3); } @@ -769,7 +773,7 @@ static ztest_option_t option_table[] = { DEFAULT_RAID_CHILDREN, NULL}, { 'R', "raid-parity", "INTEGER", "Raid parity", DEFAULT_RAID_PARITY, NULL}, - { 'K', "raid-kind", "raidz|eraidz|draid|random", "Raid kind", + { 'K', "raid-kind", "raidz|eraidz|draid|anyraid|random", "Raid kind", NO_DEFAULT, "random"}, { 'D', "draid-data", "INTEGER", "Number of draid data drives", DEFAULT_DRAID_DATA, NULL}, @@ -1119,7 +1123,7 @@ process_options(int argc, char **argv) } if (strcmp(raid_kind, "random") == 0) { - switch (ztest_random(3)) { + switch (ztest_random(4)) { case 0: raid_kind = "raidz"; break; @@ -1129,6 +1133,9 @@ process_options(int argc, char **argv) case 2: raid_kind = "draid"; break; + case 3: + raid_kind = "anyraid"; + break; } if (ztest_opts.zo_verbose >= 3) @@ -1180,11 +1187,25 @@ process_options(int argc, char **argv) zo->zo_raid_parity = MIN(zo->zo_raid_parity, zo->zo_raid_children - 1); - } else /* using raidz */ { - ASSERT0(strcmp(raid_kind, "raidz")); + } else if (strcmp(raid_kind, "raidz") == 0) { + zo->zo_raid_parity = MIN(zo->zo_raid_parity, + zo->zo_raid_children - 1); + } else if (strcmp(raid_kind, "anyraid") == 0) { + uint64_t min_devsize; + + /* With fewer disks use 1G, otherwise 512M is OK */ + min_devsize = (ztest_opts.zo_raid_children < 16) ? + (1ULL << 30) : (512ULL << 20); + if (zo->zo_vdev_size < min_devsize) + zo->zo_vdev_size = min_devsize; zo->zo_raid_parity = MIN(zo->zo_raid_parity, zo->zo_raid_children - 1); + + (void) strlcpy(zo->zo_raid_type, VDEV_TYPE_ANYRAID, + sizeof (zo->zo_raid_type)); + } else { + fatal(B_FALSE, "invalid raid kind %s", raid_kind); } zo->zo_vdevtime = @@ -1375,6 +1396,9 @@ make_vdev_raid(const char *path, const char *aux, const char *pool, size_t size, fnvlist_add_uint64(raid, ZPOOL_CONFIG_DRAID_NDATA, ndata); fnvlist_add_uint64(raid, ZPOOL_CONFIG_DRAID_NSPARES, nspares); fnvlist_add_uint64(raid, ZPOOL_CONFIG_DRAID_NGROUPS, ngroups); + } else if (strcmp(ztest_opts.zo_raid_type, VDEV_TYPE_ANYRAID) == 0) { + fnvlist_add_uint8(raid, ZPOOL_CONFIG_ANYRAID_PARITY_TYPE, + VAP_MIRROR); } for (c = 0; c < r; c++) @@ -3166,7 +3190,8 @@ ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id) return; /* dRAID added after feature flags, skip upgrade test. */ - if (strcmp(ztest_opts.zo_raid_type, VDEV_TYPE_DRAID) == 0) + if (strcmp(ztest_opts.zo_raid_type, VDEV_TYPE_DRAID) == 0 || + strcmp(ztest_opts.zo_raid_type, VDEV_TYPE_ANYRAID) == 0) return; mutex_enter(&ztest_vdev_lock); @@ -3790,28 +3815,47 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) if (ztest_opts.zo_raid_children > 1) { if (strcmp(oldvd->vdev_ops->vdev_op_type, "raidz") == 0) ASSERT3P(oldvd->vdev_ops, ==, &vdev_raidz_ops); + else if (strcmp(oldvd->vdev_ops->vdev_op_type, "anyraid") == 0) + ASSERT3P(oldvd->vdev_ops, ==, &vdev_anyraid_ops); else ASSERT3P(oldvd->vdev_ops, ==, &vdev_draid_ops); oldvd = oldvd->vdev_child[leaf % raidz_children]; } + if (!replacing && oldvd->vdev_parent->vdev_ops == &vdev_anyraid_ops) { + oldvd = oldvd->vdev_parent; + } + /* * If we're already doing an attach or replace, oldvd may be a - * mirror vdev -- in which case, pick a random child. + * mirror vdev -- in which case, pick a random child. For anyraid vdevs, + * attachment occurs at the parent level. */ - while (oldvd->vdev_children != 0) { + while (oldvd->vdev_children != 0 && oldvd->vdev_ops != + &vdev_anyraid_ops) { oldvd_has_siblings = B_TRUE; ASSERT3U(oldvd->vdev_children, >=, 2); oldvd = oldvd->vdev_child[ztest_random(oldvd->vdev_children)]; } oldguid = oldvd->vdev_guid; - oldsize = vdev_get_min_asize(oldvd); + if (oldvd->vdev_ops != &vdev_anyraid_ops) + oldsize = vdev_get_min_asize(oldvd); + else + oldsize = oldvd->vdev_child[ + ztest_random(oldvd->vdev_children)]->vdev_asize; oldvd_is_log = oldvd->vdev_top->vdev_islog; oldvd_is_special = oldvd->vdev_top->vdev_alloc_bias == VDEV_BIAS_SPECIAL || oldvd->vdev_top->vdev_alloc_bias == VDEV_BIAS_DEDUP; - (void) strlcpy(oldpath, oldvd->vdev_path, MAXPATHLEN); + if (oldvd->vdev_path == NULL) { + ASSERT3P(oldvd->vdev_ops, ==, &vdev_anyraid_ops); + snprintf(oldpath, MAXPATHLEN, "%s-%llu", + oldvd->vdev_ops->vdev_op_type, + (u_longlong_t)oldvd->vdev_id); + } else { + (void) strlcpy(oldpath, oldvd->vdev_path, MAXPATHLEN); + } pvd = oldvd->vdev_parent; pguid = pvd->vdev_guid; @@ -3820,7 +3864,8 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) * to the detach the pool is scrubbed in order to prevent creating * unrepairable blocks as a result of the data corruption injection. */ - if (oldvd_has_siblings && ztest_random(2) == 0) { + if (oldvd_has_siblings && oldvd->vdev_ops != &vdev_anyraid_ops && + ztest_random(2) == 0) { spa_config_exit(spa, SCL_ALL, FTAG); error = ztest_scrub_impl(spa); @@ -3884,7 +3929,9 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) * If newvd is a distributed spare and it's being attached to a * dRAID which is not its parent it should fail with ENOTSUP. */ - if (pvd->vdev_ops != &vdev_mirror_ops && + if (oldvd->vdev_ops == &vdev_anyraid_ops) + expected_error = 0; + else if (pvd->vdev_ops != &vdev_mirror_ops && pvd->vdev_ops != &vdev_root_ops && (!replacing || pvd->vdev_ops == &vdev_replacing_ops || pvd->vdev_ops == &vdev_spare_ops)) @@ -3896,7 +3943,9 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) expected_error = replacing ? 0 : EBUSY; else if (vdev_lookup_by_path(rvd, newpath) != NULL) expected_error = EBUSY; - else if (!newvd_is_dspare && newsize < oldsize) + else if (newsize < oldsize && !(newvd_is_dspare || + (pvd->vdev_ops == &vdev_anyraid_ops && + newsize < pvd->vdev_ops->vdev_op_min_asize(pvd, oldvd)))) expected_error = EOVERFLOW; else if (ashift > oldvd->vdev_top->vdev_ashift) expected_error = EDOM; @@ -3917,8 +3966,9 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) * When supported select either a healing or sequential resilver. */ boolean_t rebuilding = B_FALSE; - if (pvd->vdev_ops == &vdev_mirror_ops || - pvd->vdev_ops == &vdev_root_ops) { + if (oldvd->vdev_ops != &vdev_anyraid_ops && + (pvd->vdev_ops == &vdev_mirror_ops || + pvd->vdev_ops == &vdev_root_ops)) { rebuilding = !!ztest_random(2); } @@ -8999,6 +9049,9 @@ main(int argc, char **argv) metaslab_df_alloc_threshold = zs->zs_metaslab_df_alloc_threshold; + zfs_anyraid_min_tile_size = MIN(zfs_anyraid_min_tile_size, + ztest_opts.zo_vdev_size / 8); + if (zs->zs_do_init) ztest_run_init(); else diff --git a/include/Makefile.am b/include/Makefile.am index 3312dd28c1c5..8b74413ced77 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -100,6 +100,7 @@ COMMON_H = \ sys/unique.h \ sys/uuid.h \ sys/vdev.h \ + sys/vdev_anyraid.h \ sys/vdev_disk.h \ sys/vdev_draid.h \ sys/vdev_file.h \ diff --git a/include/os/linux/kernel/linux/mod_compat.h b/include/os/linux/kernel/linux/mod_compat.h index e49ada399694..ac320869cdc2 100644 --- a/include/os/linux/kernel/linux/mod_compat.h +++ b/include/os/linux/kernel/linux/mod_compat.h @@ -38,6 +38,7 @@ typedef const struct kernel_param zfs_kernel_param_t; enum scope_prefix_types { zfs, + zfs_anyraid, zfs_arc, zfs_brt, zfs_condense, diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 830c8455bb1a..1d1cd5ec194e 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -907,10 +907,14 @@ typedef struct zpool_load_policy { #define ZPOOL_CONFIG_DRAID_NSPARES "draid_nspares" #define ZPOOL_CONFIG_DRAID_NGROUPS "draid_ngroups" +/* ANYRAID configuration */ +#define ZPOOL_CONFIG_ANYRAID_PARITY_TYPE "parity_type" + #define VDEV_TYPE_ROOT "root" #define VDEV_TYPE_MIRROR "mirror" #define VDEV_TYPE_REPLACING "replacing" #define VDEV_TYPE_RAIDZ "raidz" +#define VDEV_TYPE_ANYRAID "anyraid" #define VDEV_TYPE_DRAID "draid" #define VDEV_TYPE_DRAID_SPARE "dspare" #define VDEV_TYPE_DISK "disk" @@ -922,6 +926,8 @@ typedef struct zpool_load_policy { #define VDEV_TYPE_L2CACHE "l2cache" #define VDEV_TYPE_INDIRECT "indirect" +#define VDEV_ANYRAID_MAXPARITY 3 + #define VDEV_RAIDZ_MAXPARITY 3 #define VDEV_DRAID_MAXPARITY 3 diff --git a/include/sys/vdev.h b/include/sys/vdev.h index 7a5d4f6a5e6f..c4a4388bfccb 100644 --- a/include/sys/vdev.h +++ b/include/sys/vdev.h @@ -192,9 +192,9 @@ extern uint64_t vdev_queue_class_length(vdev_t *vq, zio_priority_t p); extern boolean_t vdev_queue_pool_busy(spa_t *spa); typedef enum vdev_config_sync_status { - VDEV_CONFIG_NORMAL, + VDEV_CONFIG_KEEP_CHECKPOINT, VDEV_CONFIG_CREATING_CHECKPOINT, - VDEV_CONFIG_DISCARDING_CHECKPOINT, + VDEV_CONFIG_NO_CHECKPOINT, VDEV_CONFIG_REWINDING_CHECKPOINT } vdev_config_sync_status_t; diff --git a/include/sys/vdev_anyraid.h b/include/sys/vdev_anyraid.h new file mode 100644 index 000000000000..9276efe03fa9 --- /dev/null +++ b/include/sys/vdev_anyraid.h @@ -0,0 +1,270 @@ +// SPDX-License-Identifier: CDDL-1.0 +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2025, Klara Inc. + */ + +#ifndef _SYS_VDEV_ANYRAID_H +#define _SYS_VDEV_ANYRAID_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum vdev_anyraid_parity_type { + VAP_MIRROR, // includes raid0, i.e. a 0-parity mirror + VAP_TYPES, +} vdev_anyraid_parity_type_t; + +typedef struct vdev_anyraid_node { + avl_node_t van_node; + uint8_t van_id; + uint16_t van_next_offset; + uint16_t van_capacity; +} vdev_anyraid_node_t; + +typedef struct vdev_anyraid { + vdev_anyraid_parity_type_t vd_parity_type; + /* + * The parity of the mismatched vdev; 0 for raid0, or the number of + * mirrors. + */ + uint_t vd_nparity; + uint64_t vd_tile_size; + + krwlock_t vd_lock; + avl_tree_t vd_tile_map; + avl_tree_t vd_children_tree; + uint32_t vd_checkpoint_tile; + vdev_anyraid_node_t **vd_children; +} vdev_anyraid_t; + +typedef struct anyraid_tile_node { + list_node_t atn_node; + uint8_t atn_disk; + uint16_t atn_offset; +} anyraid_tile_node_t; + +typedef struct anyraid_tile { + avl_node_t at_node; + uint32_t at_tile_id; + list_t at_list; +} anyraid_tile_t; + +/* + * The ondisk structure of the anyraid tile map is VDEV_ANYRAID_MAP_COPIES + * copies of the following layout. We store the tile map on every disk, and + * each TXG we update a different copy (txg % VDEV_ANYRAID_MAP_COPIES). + * + * First, we start with a MAX(8KiB, 1 << ashift) tile that stores a packed + * nvlist containing the header. The header contains a version number, a disk + * ID, a TXG, the tile size (in bytes), the stripe width/parity of the + * tiles, the length of the mapping (in bytes), the pool guid, and the + * checksum of the mapping. This 4KiB tile has an embedded checksum so that + * uses the normal ZIO_CHECKSUM_LABEL algorithm. + * + * Then, there is a tile of size VDEV_ANYRAID_MAP_SIZE. This stores the actual + * mapping. It is a series of entries. Right now, there are two entry types: + * + * 0: Skip entries represent a gap in logical tile IDs. From the current + * tile ID, add the value stored in the lower 24 bits of the skip entry. + * + * 1: Location entries represent a mapped tile. Each one represents a single + * physical tile backing the current logical tile. There can be multiple + * physical tiles for one logical tile; that number is the stripe width/ + * parity from the header. These entries contain a 8 bit disk ID and a 16 bit + * offset on that disk. + * + * Here is an example of what the mapping looks like on disk. This is for a + * 1-parity mirror anyraid device: + * + * +----------+----------+----------+----------+----------+----------+ + * | Tile 0 | Tile 0 | Tile 1 | Tile 1 | Tile 2 | Tile 2 | + * | Parity 0 | Parity 1 | Parity 0 | Parity 1 | Parity 0 | Parity 1 | + * | Disk 0 | Disk 1 | Disk 0 | Disk 2 | Disk 0 | Disk 1 | + * | Offset 0 | Offset 0 | Offset 1 | Offset 0 | Offset 2 | Offset 1 | + * +----------+----------+----------+----------+----------+----------+ + * + * Note that each of these entries acutally only contains the "disk" and + * "offset" fields on-disk; the "tile" and "parity" information is derived from + * context (since the entries are stored in tile/offset order, with no gaps + * unless a skip entry is present). + * + * New entry types will be added eventually to store information like parity + * changes. + * + * Because the mapping can be larger than the SPA_MAXBLOCKSIZE, it has to be + * written in multiple IOs; each IO-sized region has their own checksum, which + * is stored in the header block (using the ZIO_CHECKSUM_ANYRAID_MAP algorithm). + */ + +/* + * ========================================================================== + * Header-related definitions + * ========================================================================== + */ +#define VDEV_ANYRAID_HEADER_VERSION "version" +#define VDEV_ANYRAID_HEADER_DISK "disk" +#define VDEV_ANYRAID_HEADER_TXG "txg" +#define VDEV_ANYRAID_HEADER_TILE_SIZE "tile_size" +#define VDEV_ANYRAID_HEADER_LENGTH "length" +#define VDEV_ANYRAID_HEADER_CHECKPOINT "checkpoint_txg" +#define VDEV_ANYRAID_HEADER_DISK_SIZES "sizes" +/* + * We store the pool guid to prevent disks being reused from an old pool from + * causing any issues. + */ +#define VDEV_ANYRAID_HEADER_GUID "guid" + +#define VDEV_ANYRAID_MAP_HEADER_SIZE(ashift) MAX(8 * 1024, 1ULL << (ashift)) + +#define VDEV_ANYRAID_NVL_BYTES(ashift) \ + (VDEV_ANYRAID_MAP_HEADER_SIZE(ashift) - \ + (VDEV_ANYRAID_MAP_COPIES + 1) * sizeof (zio_eck_t)) + +/* + * ========================================================================== + * Mapping-related definitions + * ========================================================================== + */ +typedef enum anyraid_map_entry_type { + AMET_SKIP = 0, + AMET_LOC = 1, + AMET_TYPES +} anyraid_map_entry_type_t; + +/* + * ========================================================================== + * Skip entry definitions and functions + * ========================================================================== + */ +typedef struct anyraid_map_skip_entry { + union { + uint8_t amse_type; + uint32_t amse_skip_count; // tile count to skip ahead + } amse_u; +} anyraid_map_skip_entry_t; + +#define AMSE_TILE_BITS 24 + +static inline void +amse_set_type(anyraid_map_skip_entry_t *amse) +{ + amse->amse_u.amse_type = AMET_SKIP; + ASSERT3U(amse->amse_u.amse_type, ==, + BF32_GET(amse->amse_u.amse_type, 0, 8)); +} + +static inline void +amse_set_skip_count(anyraid_map_skip_entry_t *amse, uint32_t skip_count) +{ + BF32_SET(amse->amse_u.amse_skip_count, 8, AMSE_TILE_BITS, skip_count); +} + +static inline uint32_t +amse_get_skip_count(anyraid_map_skip_entry_t *amse) +{ + return (BF32_GET(amse->amse_u.amse_skip_count, 8, AMSE_TILE_BITS)); +} + +/* + * ========================================================================== + * Location entry definitions and functions + * ========================================================================== + */ +typedef struct anyraid_map_loc_entry { + uint8_t amle_type; + uint8_t amle_disk; + uint16_t amle_offset; +} anyraid_map_loc_entry_t; +_Static_assert(sizeof (anyraid_map_loc_entry_t) == sizeof (uint32_t), ""); + +/* + * ========================================================================== + * Overall mapping definitions + * ========================================================================== + */ + +typedef struct anyraid_map_entry { + union { + anyraid_map_skip_entry_t ame_amse; + anyraid_map_loc_entry_t ame_amle; + } ame_u; +} anyraid_map_entry_t; + +#define VDEV_ANYRAID_MAX_DISKS (1 << 8) +#define VDEV_ANYRAID_MAX_TPD (1 << 16) +#define VDEV_ANYRAID_MAX_TILES (VDEV_ANYRAID_MAX_DISKS * VDEV_ANYRAID_MAX_TPD) +/* + * The worst case scenario here is that we have a loc entry for every single + * tile (0 skips). At that point, we're using 4 bytes per tile. + * That gives us 2^24 * 4 bytes = 64 MB to store the entire map. + */ +#define VDEV_ANYRAID_MAP_SIZE (sizeof (anyraid_map_loc_entry_t) * \ + VDEV_ANYRAID_MAX_TILES) +#define VDEV_ANYRAID_SINGLE_MAP_SIZE(ashift) \ + ((VDEV_ANYRAID_MAP_HEADER_SIZE(ashift) + VDEV_ANYRAID_MAP_SIZE)) +#define VDEV_ANYRAID_MAP_COPIES 4 +#define VDEV_ANYRAID_TOTAL_MAP_SIZE(ashift) (VDEV_ANYRAID_MAP_COPIES * \ + VDEV_ANYRAID_SINGLE_MAP_SIZE(ashift)) + +_Static_assert(VDEV_ANYRAID_TOTAL_MAP_SIZE(9) % SPA_MINBLOCKSIZE == 0, ""); +_Static_assert(VDEV_ANYRAID_TOTAL_MAP_SIZE(12) % SPA_MINBLOCKSIZE == 0, ""); +_Static_assert(VDEV_ANYRAID_MAP_SIZE % SPA_MAXBLOCKSIZE == 0, ""); + +/* + * ========================================================================== + * Externally-accessed function definitions + * ========================================================================== + */ +void vdev_anyraid_write_map_sync(vdev_t *vd, zio_t *pio, uint64_t txg, + uint64_t *good_writes, int flags, vdev_config_sync_status_t status); + +uint64_t vdev_anyraid_min_newsize(vdev_t *vd, uint64_t ashift); +void vdev_anyraid_expand(vdev_t *tvd, vdev_t *newvd); +boolean_t vdev_anyraid_mapped(vdev_t *vd, uint64_t offset); + +/* + * These functions are exposed for ZDB. + */ + +typedef struct anyraid_header { + abd_t *ah_abd; + char *ah_buf; + nvlist_t *ah_nvl; +} anyraid_header_t; + +int vdev_anyraid_pick_best_mapping(vdev_t *cvd, + uint64_t *out_txg, anyraid_header_t *out_header, int *out_mapping); +int vdev_anyraid_open_header(vdev_t *cvd, int header, + anyraid_header_t *out_header); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_VDEV_ANYRAID_H */ diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index 1ee94a8cc5c0..f58683ecd9bf 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -619,6 +619,7 @@ extern vdev_ops_t vdev_missing_ops; extern vdev_ops_t vdev_hole_ops; extern vdev_ops_t vdev_spare_ops; extern vdev_ops_t vdev_indirect_ops; +extern vdev_ops_t vdev_anyraid_ops; extern zio_vsd_ops_t vdev_mirror_vsd_ops; diff --git a/include/sys/vdev_mirror.h b/include/sys/vdev_mirror.h index 0057d6cbfdba..f48cc333e8e0 100644 --- a/include/sys/vdev_mirror.h +++ b/include/sys/vdev_mirror.h @@ -19,8 +19,13 @@ * * CDDL HEADER END */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ /* + * Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright (c) 2025, Klara Inc. */ diff --git a/include/sys/zio.h b/include/sys/zio.h index acb0a03a36b2..55d7f8bf4f77 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -107,6 +107,7 @@ enum zio_checksum { ZIO_CHECKSUM_SKEIN, ZIO_CHECKSUM_EDONR, ZIO_CHECKSUM_BLAKE3, + ZIO_CHECKSUM_ANYRAID_MAP, ZIO_CHECKSUM_FUNCTIONS }; @@ -213,6 +214,7 @@ typedef uint64_t zio_flag_t; #define ZIO_FLAG_NODATA (1ULL << 12) #define ZIO_FLAG_INDUCE_DAMAGE (1ULL << 13) #define ZIO_FLAG_ALLOC_THROTTLED (1ULL << 14) +#define ZIO_FLAG_ZILWRITE (1ULL << 15) #define ZIO_FLAG_DDT_INHERIT (ZIO_FLAG_IO_RETRY - 1) #define ZIO_FLAG_GANG_INHERIT (ZIO_FLAG_IO_RETRY - 1) @@ -220,29 +222,29 @@ typedef uint64_t zio_flag_t; /* * Flags inherited by vdev children. */ -#define ZIO_FLAG_IO_RETRY (1ULL << 15) /* must be first for INHERIT */ -#define ZIO_FLAG_PROBE (1ULL << 16) -#define ZIO_FLAG_TRYHARD (1ULL << 17) -#define ZIO_FLAG_OPTIONAL (1ULL << 18) -#define ZIO_FLAG_DIO_READ (1ULL << 19) +#define ZIO_FLAG_IO_RETRY (1ULL << 16) /* must be first for INHERIT */ +#define ZIO_FLAG_PROBE (1ULL << 17) +#define ZIO_FLAG_TRYHARD (1ULL << 18) +#define ZIO_FLAG_OPTIONAL (1ULL << 19) +#define ZIO_FLAG_DIO_READ (1ULL << 20) #define ZIO_FLAG_VDEV_INHERIT (ZIO_FLAG_DONT_QUEUE - 1) /* * Flags not inherited by any children. */ -#define ZIO_FLAG_DONT_QUEUE (1ULL << 20) /* must be first for INHERIT */ -#define ZIO_FLAG_DONT_PROPAGATE (1ULL << 21) -#define ZIO_FLAG_IO_BYPASS (1ULL << 22) -#define ZIO_FLAG_IO_REWRITE (1ULL << 23) -#define ZIO_FLAG_RAW_COMPRESS (1ULL << 24) -#define ZIO_FLAG_RAW_ENCRYPT (1ULL << 25) -#define ZIO_FLAG_GANG_CHILD (1ULL << 26) -#define ZIO_FLAG_DDT_CHILD (1ULL << 27) -#define ZIO_FLAG_GODFATHER (1ULL << 28) -#define ZIO_FLAG_NOPWRITE (1ULL << 29) -#define ZIO_FLAG_REEXECUTED (1ULL << 30) -#define ZIO_FLAG_DELEGATED (1ULL << 31) -#define ZIO_FLAG_PREALLOCATED (1ULL << 32) +#define ZIO_FLAG_DONT_QUEUE (1ULL << 21) /* must be first for INHERIT */ +#define ZIO_FLAG_DONT_PROPAGATE (1ULL << 22) +#define ZIO_FLAG_IO_BYPASS (1ULL << 23) +#define ZIO_FLAG_IO_REWRITE (1ULL << 24) +#define ZIO_FLAG_RAW_COMPRESS (1ULL << 25) +#define ZIO_FLAG_RAW_ENCRYPT (1ULL << 26) +#define ZIO_FLAG_GANG_CHILD (1ULL << 27) +#define ZIO_FLAG_DDT_CHILD (1ULL << 28) +#define ZIO_FLAG_GODFATHER (1ULL << 29) +#define ZIO_FLAG_NOPWRITE (1ULL << 30) +#define ZIO_FLAG_REEXECUTED (1ULL << 31) +#define ZIO_FLAG_DELEGATED (1ULL << 32) +#define ZIO_FLAG_PREALLOCATED (1ULL << 33) #define ZIO_ALLOCATOR_NONE (-1) #define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE) diff --git a/include/sys/zio_checksum.h b/include/sys/zio_checksum.h index f07ad2605e31..b68c712943c4 100644 --- a/include/sys/zio_checksum.h +++ b/include/sys/zio_checksum.h @@ -140,8 +140,8 @@ extern int zio_checksum_equal(spa_t *, blkptr_t *, enum zio_checksum, void *, uint64_t, uint64_t, zio_bad_cksum_t *); extern void zio_checksum_compute(zio_t *, enum zio_checksum, struct abd *, uint64_t); -extern int zio_checksum_error_impl(spa_t *, const blkptr_t *, enum zio_checksum, - struct abd *, uint64_t, uint64_t, zio_bad_cksum_t *); +extern int zio_checksum_error_impl(zio_t *, enum zio_checksum, struct abd *, + uint64_t, uint64_t, zio_bad_cksum_t *); extern int zio_checksum_error(zio_t *zio, zio_bad_cksum_t *out); extern enum zio_checksum spa_dedup_checksum(spa_t *spa); extern void zio_checksum_templates_free(spa_t *spa); diff --git a/include/zfeature_common.h b/include/zfeature_common.h index 56382ca85b55..c44671673fbd 100644 --- a/include/zfeature_common.h +++ b/include/zfeature_common.h @@ -90,6 +90,7 @@ typedef enum spa_feature { SPA_FEATURE_DYNAMIC_GANG_HEADER, SPA_FEATURE_BLOCK_CLONING_ENDIAN, SPA_FEATURE_PHYSICAL_REWRITE, + SPA_FEATURE_ANYRAID, SPA_FEATURES } spa_feature_t; diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 232265237f54..479e32b5469b 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -640,7 +640,7 @@ - + @@ -6027,7 +6027,10 @@ - + + + + @@ -6310,7 +6313,8 @@ - + + @@ -9521,8 +9525,8 @@ - - + + @@ -9600,7 +9604,7 @@ - + diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 756d701e2d97..4f332329f48d 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -1614,6 +1614,18 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot, "minimum size (%s)"), buf); } return (zfs_error(hdl, EZFS_BADDEV, errbuf)); + case ENOLCK: + /* + * This occurs when one of the devices is an anyraid + * device that can't hold a single tile. + * Unfortunately, we can't detect which device was the + * problem device since there's no reliable way to + * determine device size from userland. + */ + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "one or more anyraid devices cannot store " + "any tiles")); + return (zfs_error(hdl, EZFS_BADDEV, errbuf)); case ENOSPC: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, @@ -1848,7 +1860,18 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot, boolean_t check_ashift) } (void) zfs_error(hdl, EZFS_BADDEV, errbuf); break; - + case ENOLCK: + /* + * This occurs when one of the devices is an anyraid + * device that can't hold a single tile. + * Unfortunately, we can't detect which device was the + * problem device since there's no reliable way to + * determine device size from userland. + */ + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "one or more anyraid devices cannot store " + "any tiles")); + return (zfs_error(hdl, EZFS_BADDEV, errbuf)); case ENOTSUP: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be upgraded to add these vdevs")); @@ -3197,7 +3220,8 @@ zpool_vdev_is_interior(const char *name) strncmp(name, VDEV_TYPE_REPLACING, strlen(VDEV_TYPE_REPLACING)) == 0 || strncmp(name, VDEV_TYPE_ROOT, strlen(VDEV_TYPE_ROOT)) == 0 || - strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0) + strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0 || + strncmp(name, VDEV_TYPE_ANYRAID, strlen(VDEV_TYPE_ANYRAID)) == 0) return (B_TRUE); if (strncmp(name, VDEV_TYPE_DRAID, strlen(VDEV_TYPE_DRAID)) == 0 && @@ -3774,6 +3798,15 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk, (void) zfs_error(hdl, EZFS_BADDEV, errbuf); break; + case ENOLCK: + /* + * This occurs when one of the devices is an anyraid + * device that can't hold a single tile. + */ + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "new device cannot store any tiles")); + return (zfs_error(hdl, EZFS_BADDEV, errbuf)); + case ENAMETOOLONG: /* * The resulting top-level vdev spec won't fit in the label. @@ -4557,9 +4590,11 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv, path = type; /* - * If it's a raidz device, we need to stick in the parity level. + * If it's a raidz or anyraid device, we need to stick in the + * parity level. */ - if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) { + if (strcmp(path, VDEV_TYPE_RAIDZ) == 0 || + strcmp(path, VDEV_TYPE_ANYRAID) == 0) { value = fnvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY); (void) snprintf(buf, sizeof (buf), "%s%llu", path, (u_longlong_t)value); diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index aeacc595b363..7097e7053e2b 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -148,6 +148,7 @@ nodist_libzpool_la_SOURCES = \ module/zfs/vdev_label.c \ module/zfs/vdev_mirror.c \ module/zfs/vdev_missing.c \ + module/zfs/vdev_anyraid.c \ module/zfs/vdev_queue.c \ module/zfs/vdev_raidz.c \ module/zfs/vdev_raidz_math.c \ diff --git a/man/man8/zdb.8 b/man/man8/zdb.8 index f51e24fa849c..a77bf112a13b 100644 --- a/man/man8/zdb.8 +++ b/man/man8/zdb.8 @@ -95,6 +95,9 @@ .Op Fl e Oo Fl V Oc Oo Fl p Ar path Oc Ns … .Op Fl U Ar cache .Ar poolname +.Nm +.Fl -anyraid-map +.Ar poolname Op Ar vdev Ns … . .Sh DESCRIPTION The @@ -440,6 +443,8 @@ Display histograms of per-vdev BRT refcounts. Dump the contents of the block reference tables. .It Fl u , -uberblock Display the current uberblock. +.It Fl -anyraid-map +Display information about the mappings of one or all anyraid vdevs in the pool. .El .Pp Other options: diff --git a/module/Kbuild.in b/module/Kbuild.in index 95313c984178..5958b76476cd 100644 --- a/module/Kbuild.in +++ b/module/Kbuild.in @@ -385,6 +385,7 @@ ZFS_OBJS := \ vdev_label.o \ vdev_mirror.o \ vdev_missing.o \ + vdev_anyraid.o \ vdev_queue.o \ vdev_raidz.o \ vdev_raidz_math.o \ diff --git a/module/Makefile.bsd b/module/Makefile.bsd index c20fdc0c483b..870f54a4d970 100644 --- a/module/Makefile.bsd +++ b/module/Makefile.bsd @@ -335,6 +335,7 @@ SRCS+= abd.c \ vdev_label.c \ vdev_mirror.c \ vdev_missing.c \ + vdev_anyraid.c \ vdev_queue.c \ vdev_raidz.c \ vdev_raidz_math_avx2.c \ diff --git a/module/os/freebsd/zfs/sysctl_os.c b/module/os/freebsd/zfs/sysctl_os.c index 393bfaa65ff5..b1fe3e1800f3 100644 --- a/module/os/freebsd/zfs/sysctl_os.c +++ b/module/os/freebsd/zfs/sysctl_os.c @@ -93,6 +93,8 @@ #include SYSCTL_DECL(_vfs_zfs); +SYSCTL_NODE(_vfs_zfs, OID_AUTO, anyraid, CTLFLAG_RW, 0, + "ZFS AnyRAID VDEV"); SYSCTL_NODE(_vfs_zfs, OID_AUTO, arc, CTLFLAG_RW, 0, "ZFS adaptive replacement cache"); SYSCTL_NODE(_vfs_zfs, OID_AUTO, brt, CTLFLAG_RW, 0, diff --git a/module/zcommon/zfeature_common.c b/module/zcommon/zfeature_common.c index 6ba9892eeb64..e3a96fad32ee 100644 --- a/module/zcommon/zfeature_common.c +++ b/module/zcommon/zfeature_common.c @@ -810,6 +810,10 @@ zpool_feature_init(void) ZFEATURE_TYPE_BOOLEAN, physical_rewrite_deps, sfeatures); } + zfeature_register(SPA_FEATURE_ANYRAID, + "com.klarasystems:anyraid", "anyraid", "Support for anyraid VDEV", + ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL, sfeatures); + zfs_mod_list_supported_free(sfeatures); } diff --git a/module/zfs/arc.c b/module/zfs/arc.c index b864d9035974..f9c766aff0c7 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -1507,9 +1507,8 @@ arc_cksum_is_equal(arc_buf_hdr_t *hdr, zio_t *zio) * generated using the correct checksum algorithm and accounts for the * logical I/O size and not just a gang fragment. */ - return (zio_checksum_error_impl(zio->io_spa, zio->io_bp, - BP_GET_CHECKSUM(zio->io_bp), zio->io_abd, zio->io_size, - zio->io_offset, NULL) == 0); + return (zio_checksum_error_impl(zio, BP_GET_CHECKSUM(zio->io_bp), + zio->io_abd, zio->io_size, zio->io_offset, NULL) == 0); } /* diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 5690f8afad00..19b7527d4376 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -2052,7 +2052,8 @@ dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd, abd_get_from_buf(zgd->zgd_db->db_data, zgd->zgd_db->db_size), zgd->zgd_db->db_size, zgd->zgd_db->db_size, zp, dmu_sync_late_arrival_ready, NULL, dmu_sync_late_arrival_done, - dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb)); + dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL | ZIO_FLAG_ZILWRITE, + zb)); return (0); } @@ -2220,8 +2221,8 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) zio_nowait(arc_write(pio, os->os_spa, txg, zgd->zgd_bp, dr->dt.dl.dr_data, !DBUF_IS_CACHEABLE(db), dbuf_is_l2cacheable(db, NULL), &zp, dmu_sync_ready, NULL, - dmu_sync_done, dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, - &zb)); + dmu_sync_done, dsa, ZIO_PRIORITY_SYNC_WRITE, + ZIO_FLAG_CANFAIL | ZIO_FLAG_ZILWRITE, &zb)); return (0); } diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 34c39c846a94..b2ada5e980b6 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -3250,7 +3251,8 @@ metaslab_space_weight(metaslab_t *msp) * In effect, this means that we'll select the metaslab with the most * free bandwidth rather than simply the one with the most free space. */ - if (!vd->vdev_nonrot && metaslab_lba_weighting_enabled) { + if ((!vd->vdev_nonrot && metaslab_lba_weighting_enabled) || + vd->vdev_ops == &vdev_anyraid_ops) { weight = 2 * weight - (msp->ms_id * weight) / vd->vdev_ms_count; ASSERT(weight >= space && weight <= 2 * space); } @@ -3417,8 +3419,13 @@ metaslab_segment_weight(metaslab_t *msp) * that case specifically. */ vdev_t *vd = mg->mg_vd; - if (B_FALSE) { - weight = 2 * weight - (msp->ms_id * weight) / vd->vdev_ms_count; + if ((vd->vdev_ops == &vdev_anyraid_ops || + metaslab_lba_weighting_enabled) && + WEIGHT_GET_INDEX(weight) > SPA_MAXBLOCKSHIFT) { + uint64_t id = msp->ms_id; + uint64_t count = vd->vdev_ms_count; + WEIGHT_SET_INDEX(weight, WEIGHT_GET_INDEX(weight) + 3 - + ((id * 4) / count)); weight = MIN(weight, METASLAB_MAX_WEIGHT); } @@ -3442,7 +3449,8 @@ metaslab_segment_weight(metaslab_t *msp) * weights we rely on the entire weight (excluding the weight-type bit). */ static boolean_t -metaslab_should_allocate(metaslab_t *msp, uint64_t asize, boolean_t try_hard) +metaslab_should_allocate(metaslab_t *msp, uint64_t asize, boolean_t try_hard, + boolean_t mapped) { /* * This case will usually but not always get caught by the checks below; @@ -3453,6 +3461,17 @@ metaslab_should_allocate(metaslab_t *msp, uint64_t asize, boolean_t try_hard) if (unlikely(msp->ms_new)) return (B_FALSE); + /* + * This I/O needs to be written to a stable location and be retreivable + * before the next TXG syncs. This is the case for ZIL writes. In that + * case, if we're using an anyraid vdev, we can't use a tile that isn't\ + * mapped yet. + */ + if (mapped && msp->ms_group->mg_vd->vdev_ops == &vdev_anyraid_ops) { + return (vdev_anyraid_mapped(msp->ms_group->mg_vd, + msp->ms_start)); + } + /* * If the metaslab is loaded, ms_max_size is definitive and we can use * the fast check. If it's not, the ms_max_size is a lower bound (once @@ -4903,8 +4922,8 @@ metaslab_block_alloc(metaslab_t *msp, uint64_t size, uint64_t max_size, static metaslab_t * find_valid_metaslab(metaslab_group_t *mg, uint64_t activation_weight, dva_t *dva, int d, uint64_t asize, int allocator, - boolean_t try_hard, zio_alloc_list_t *zal, metaslab_t *search, - boolean_t *was_active) + boolean_t try_hard, boolean_t mapped, zio_alloc_list_t *zal, + metaslab_t *search, boolean_t *was_active) { avl_index_t idx; avl_tree_t *t = &mg->mg_metaslab_tree; @@ -4922,7 +4941,7 @@ find_valid_metaslab(metaslab_group_t *mg, uint64_t activation_weight, } tries++; - if (!metaslab_should_allocate(msp, asize, try_hard)) { + if (!metaslab_should_allocate(msp, asize, try_hard, mapped)) { metaslab_trace_add(zal, mg, msp, asize, d, TRACE_TOO_SMALL, allocator); continue; @@ -5003,7 +5022,7 @@ metaslab_active_mask_verify(metaslab_t *msp) static uint64_t metaslab_group_alloc(metaslab_group_t *mg, zio_alloc_list_t *zal, uint64_t asize, uint64_t max_asize, uint64_t txg, - dva_t *dva, int d, int allocator, boolean_t try_hard, + dva_t *dva, int d, int allocator, boolean_t try_hard, boolean_t mapped, uint64_t *actual_asize) { metaslab_t *msp = NULL; @@ -5079,7 +5098,7 @@ metaslab_group_alloc(metaslab_group_t *mg, zio_alloc_list_t *zal, ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK); } else { msp = find_valid_metaslab(mg, activation_weight, dva, d, - asize, allocator, try_hard, zal, search, + asize, allocator, try_hard, mapped, zal, search, &was_active); } @@ -5185,7 +5204,7 @@ metaslab_group_alloc(metaslab_group_t *mg, zio_alloc_list_t *zal, * can accurately determine if the allocation attempt should * proceed. */ - if (!metaslab_should_allocate(msp, asize, try_hard)) { + if (!metaslab_should_allocate(msp, asize, try_hard, mapped)) { /* Passivate this metaslab and select a new one. */ metaslab_trace_add(zal, mg, msp, asize, d, TRACE_TOO_SMALL, allocator); @@ -5279,7 +5298,7 @@ metaslab_group_alloc(metaslab_group_t *mg, zio_alloc_list_t *zal, * we may end up in an infinite loop retrying the same * metaslab. */ - ASSERT(!metaslab_should_allocate(msp, asize, try_hard)); + ASSERT(!metaslab_should_allocate(msp, asize, try_hard, mapped)); mutex_exit(&msp->ms_lock); } @@ -5434,8 +5453,12 @@ metaslab_alloc_dva_range(spa_t *spa, metaslab_class_t *mc, uint64_t psize, uint64_t max_asize = vdev_psize_to_asize_txg(vd, max_psize, txg); ASSERT0(P2PHASE(max_asize, 1ULL << vd->vdev_ashift)); + boolean_t mapped = B_FALSE; + if (flags & METASLAB_ZIL) + mapped = B_TRUE; + uint64_t offset = metaslab_group_alloc(mg, zal, asize, - max_asize, txg, dva, d, allocator, try_hard, + max_asize, txg, dva, d, allocator, try_hard, mapped, &asize); if (offset != -1ULL) { diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 957370afb461..623a064187e6 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -67,6 +67,7 @@ #include #include #include +#include #include #include #include @@ -6892,6 +6893,10 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, for (int i = 0; i < ndraid; i++) spa_feature_incr(spa, SPA_FEATURE_DRAID, tx); + for (uint64_t i = 0; i < rvd->vdev_children; i++) + if (rvd->vdev_child[i]->vdev_ops == &vdev_anyraid_ops) + spa_feature_incr(spa, SPA_FEATURE_ANYRAID, tx); + dmu_tx_commit(tx); spa->spa_sync_on = B_TRUE; @@ -7485,13 +7490,26 @@ spa_draid_feature_incr(void *arg, dmu_tx_t *tx) spa_feature_incr(spa, SPA_FEATURE_DRAID, tx); } +/* + * This is called as a synctask to increment the anyraid feature flag + */ +static void +spa_anyraid_feature_incr(void *arg, dmu_tx_t *tx) +{ + spa_t *spa = dmu_tx_pool(tx)->dp_spa; + uint64_t nanyraid = (uint64_t)(uintptr_t)arg; + + for (int i = 0; i < nanyraid; i++) + spa_feature_incr(spa, SPA_FEATURE_ANYRAID, tx); +} + /* * Add a device to a storage pool. */ int spa_vdev_add(spa_t *spa, nvlist_t *nvroot, boolean_t check_ashift) { - uint64_t txg, ndraid = 0; + uint64_t txg, ndraid = 0, nanyraid = 0; int error; vdev_t *rvd = spa->spa_root_vdev; vdev_t *vd, *tvd; @@ -7625,6 +7643,19 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot, boolean_t check_ashift) dmu_tx_commit(tx); } + for (uint64_t i = 0; i < vd->vdev_children; i++) + if (vd->vdev_child[i]->vdev_ops == &vdev_anyraid_ops) + nanyraid++; + if (nanyraid > 0) { + dmu_tx_t *tx; + + tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); + dsl_sync_task_nowait(spa->spa_dsl_pool, + spa_anyraid_feature_incr, + (void *)(uintptr_t)nanyraid, tx); + dmu_tx_commit(tx); + } + /* * We have to be careful when adding new vdevs to an existing pool. * If other threads start allocating from these vdevs before we @@ -7791,6 +7822,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing, return (spa_vdev_exit(spa, NULL, txg, ENODEV)); boolean_t raidz = oldvd->vdev_ops == &vdev_raidz_ops; + boolean_t anyraid = oldvd->vdev_ops == &vdev_anyraid_ops; if (raidz) { if (!spa_feature_is_enabled(spa, SPA_FEATURE_RAIDZ_EXPANSION)) @@ -7803,11 +7835,11 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing, return (spa_vdev_exit(spa, NULL, txg, ZFS_ERR_RAIDZ_EXPAND_IN_PROGRESS)); } - } else if (!oldvd->vdev_ops->vdev_op_leaf) { + } else if (!anyraid && !oldvd->vdev_ops->vdev_op_leaf) { return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); } - if (raidz) + if (raidz || anyraid) pvd = oldvd; else pvd = oldvd->vdev_parent; @@ -7868,10 +7900,13 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing, */ if (pvd->vdev_ops != &vdev_mirror_ops && pvd->vdev_ops != &vdev_root_ops && - !raidz) + !raidz && !anyraid) return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); - pvops = &vdev_mirror_ops; + if (anyraid) + pvops = &vdev_anyraid_ops; + else + pvops = &vdev_mirror_ops; } else { /* * Active hot spares can only be replaced by inactive hot @@ -7914,7 +7949,9 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing, * Make sure the new device is big enough. */ vdev_t *min_vdev = raidz ? oldvd->vdev_child[0] : oldvd; - if (newvd->vdev_asize < vdev_get_min_asize(min_vdev)) + if ((anyraid && newvd->vdev_asize < vdev_anyraid_min_newsize(min_vdev, + newvd->vdev_ashift)) || + (!anyraid && newvd->vdev_asize < vdev_get_min_asize(min_vdev))) return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); /* @@ -7961,6 +7998,11 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing, (uint_t)vdev_get_nparity(oldvd), (uint_t)oldvd->vdev_id); oldvdpath = spa_strdup(tmp); kmem_strfree(tmp); + } else if (anyraid) { + char *tmp = kmem_asprintf(VDEV_TYPE_ANYRAID "%u-%u", + (uint_t)vdev_get_nparity(oldvd), (uint_t)oldvd->vdev_id); + oldvdpath = spa_strdup(tmp); + kmem_strfree(tmp); } else { oldvdpath = spa_strdup(oldvd->vdev_path); } @@ -7988,7 +8030,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing, * If the parent is not a mirror, or if we're replacing, insert the new * mirror/replacing/spare vdev above oldvd. */ - if (!raidz && pvd->vdev_ops != pvops) { + if (!raidz && !anyraid && pvd->vdev_ops != pvops) { pvd = vdev_add_parent(oldvd, pvops); ASSERT(pvd->vdev_ops == pvops); ASSERT(oldvd->vdev_parent == pvd); @@ -8046,6 +8088,13 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing, dsl_sync_task_nowait(spa->spa_dsl_pool, vdev_raidz_attach_sync, newvd, tx); dmu_tx_commit(tx); + } else if (anyraid) { + vdev_anyraid_expand(tvd, newvd); + vdev_dirty(tvd, VDD_DTL, newvd, txg); + tvd->vdev_expanding = B_TRUE; + vdev_reopen(tvd); + spa->spa_ccw_fail_time = 0; + spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); } else { vdev_dtl_dirty(newvd, DTL_MISSING, TXG_INITIAL, dtl_max_txg - TXG_INITIAL); @@ -10334,9 +10383,9 @@ spa_sync_rewrite_vdev_config(spa_t *spa, dmu_tx_t *tx) if (dmu_tx_get_txg(tx) == spa->spa_checkpoint_txg + 1) status = VDEV_CONFIG_CREATING_CHECKPOINT; else if (spa->spa_checkpoint_txg == 0) - status = VDEV_CONFIG_DISCARDING_CHECKPOINT; + status = VDEV_CONFIG_NO_CHECKPOINT; else - status = VDEV_CONFIG_NORMAL; + status = VDEV_CONFIG_KEEP_CHECKPOINT; for (;;) { int error = 0; diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 5aafdca5eb0d..cf59f0a9acb7 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -3179,8 +3179,6 @@ EXPORT_SYMBOL(spa_guid); EXPORT_SYMBOL(spa_last_synced_txg); EXPORT_SYMBOL(spa_first_txg); EXPORT_SYMBOL(spa_syncing_txg); -EXPORT_SYMBOL(spa_load_max_txg); -EXPORT_SYMBOL(spa_current_txg); EXPORT_SYMBOL(spa_version); EXPORT_SYMBOL(spa_state); EXPORT_SYMBOL(spa_load_state); diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index beb29df2590f..fb13dea939f2 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -279,6 +279,7 @@ static vdev_ops_t *const vdev_ops_table[] = { &vdev_missing_ops, &vdev_hole_ops, &vdev_indirect_ops, + &vdev_anyraid_ops, NULL }; @@ -904,6 +905,13 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, !spa_feature_is_enabled(spa, SPA_FEATURE_DRAID)) { return (SET_ERROR(ENOTSUP)); } + + /* spa_vdev_add() expects feature to be enabled */ + if (ops == &vdev_anyraid_ops && + spa->spa_load_state != SPA_LOAD_CREATE && + !spa_feature_is_enabled(spa, SPA_FEATURE_ANYRAID)) { + return (SET_ERROR(ENOTSUP)); + } } /* diff --git a/module/zfs/vdev_anyraid.c b/module/zfs/vdev_anyraid.c new file mode 100644 index 000000000000..6c0a908d17c1 --- /dev/null +++ b/module/zfs/vdev_anyraid.c @@ -0,0 +1,1517 @@ +// SPDX-License-Identifier: CDDL-1.0 +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2025, Klara Inc. + */ + +/* + * Anyraid vdevs are a way to get the benefits of mirror (and, in the future, + * raidz) vdevs while using disks with mismatched sizes. The primary goal of + * this feature is maximizing the available space of the provided devices. + * Performance is secondary to that goal; nice to have, but not required. This + * feature is also designed to work on modern hard drives: while the feature + * will work on drives smaller than 1TB, the default tuning values are + * optimized for drives of at least that size. + * + * Anyraid works by splitting the vdev into "tiles". Each tile is the same + * size; by default, 1/64th of the size of the smallest disk in the vdev, or + * 16GiB, whichever is larger. A tile represents an area of + * logical-to-physical mapping: bytes within that logical tile are stored + * physically together. Subsequent tiles may be stored in different locations + * on the same disk, or different disks altogether. A mapping is stored on each + * disk to enable the vdev to be read normally. + * + * When parity is not considered, this provides some small benefits (device + * removal within the vdev is not yet implemented, but is very feasible, as is + * rebalancing data onto new disks), but is not generally recommended. However, + * if parity is considered, it is more useful. With mirror parity P, each + * tile is allocated onto P separate disks, providing the reliability and + * performance characteristics of a mirror vdev. In addition, because each tile + * can be allocated separately, smaller drives can work together to mirror + * larger ones dynamically and seamlessly. + * + * The mapping for these tiles is stored in a special area at the start of + * each device. Each disk has 4 full copies of the tile map, which rotate + * per txg in a similar manner to uberblocks. The tile map itself is 64MiB, + * plus a small header (~8KiB) before it. + * + * The exact space that is allocatable in an anyraid vdev is not easy to + * calculate in the general case. It's a variant of the bin-packing problem, so + * an optimal solution is complex. However, this case seems to be a sub-problem + * where greedy algorithms give optimal solutions, so that is what we do here. + * Each tile is allocated from the P disks that have the most available + * capacity. This does mean that calculating the size of a disk requires + * running the allocation algorithm until completion, but for the relatively + * small number of tiles we are working with, an O(n * log n) runtime is + * acceptable. + * + * Currently, there is a limit of 2^24 tiles in an anyraid vdev: 2^8 disks, + * and 2^16 tiles per disk. This means that by default, the largest device + * that can be fully utilized by an anyraid vdev is 1024 times the size of the + * smallest device that was present during device creation. This is not a + * fundamental limit, and could be expanded in the future. However, this does + * affect the size of the tile map. Currently, the tile map can always + * store all tiles without running out of space; 2^24 4-byte entries is 2^26 + * bytes = 64MiB. Expanding the maximum number of tiles per disk or disks per + * vdev would necessarily involve either expanding the tile map or adding + * handling for the tile map running out of space. + * + * When it comes to performance, there is a tradeoff. While the per-disk I/O + * rates are equivalent to using mirrors (because only a small amount of extra + * logic is used on top of the mirror code), the overall vdev throughput may + * not be. This is because the actively used tiles may be allocated to the + * same devices, leaving other devices idle for writes. This is especially true + * as the variation in drive sizes increases. To some extent, this problem is + * fundamental: writes fill up disks. If we want to fill all the disks, smaller + * disks will not be able to satisfy as many writes. Rewrite- and read-heavy + * workloads will encounter this problem to a lesser extent. The performance + * downsides can be mitigated with smaller tile sizes, larger metaslabs, + * and more active metaslab allocators. + * + * Checkpoints are currently supported by storing the maximum allocated tile + * at the time of the checkpoint, and then discarding all tiles after that + * when a checkpoint is rolled back. Because device addition is forbidden while + * a checkpoint is outstanding, no more complex logic is required. + * + * Currently, anyraid vdevs only work with mirror-type parity. However, plans + * for future work include: + * Raidz-type parity + * Anyraid vdev shrinking via device removal + * Rebalancing after device addition + * + * Possible future work also includes: + * Enabling rebalancing with an outstanding checkpoint + * Trim and initialize beyond the end of the allocated tiles + * Store device asizes so we can make better allocation decisions while a + * device is faulted + */ + +#include +#include +#include +#include +#include + +/* + * The smallest allowable tile size. Shrinking this is mostly useful for + * testing. Increasing it may be useful if you plan to add much larger disks to + * an array in the future, and want to be sure their full capacity will be + * usable. + */ +uint64_t zfs_anyraid_min_tile_size = (16ULL << 30); +/* + * This controls how many tiles we have per disk (based on the smallest disk + * present at creation time) + */ +int anyraid_disk_shift = 6; + +static inline int +anyraid_tile_compare(const void *p1, const void *p2) +{ + const anyraid_tile_t *r1 = p1, *r2 = p2; + + return (TREE_CMP(r1->at_tile_id, r2->at_tile_id)); +} + +static inline int +anyraid_child_compare(const void *p1, const void *p2) +{ + const vdev_anyraid_node_t *van1 = p1, *van2 = p2; + + int cmp = TREE_CMP(van2->van_capacity - van2->van_next_offset, + van1->van_capacity - van1->van_next_offset); + if (cmp != 0) + return (cmp); + + return (TREE_CMP(van1->van_id, van2->van_id)); +} + +/* + * Initialize private VDEV specific fields from the nvlist. + */ +static int +vdev_anyraid_init(spa_t *spa, nvlist_t *nv, void **tsd) +{ + (void) spa; + uint_t children; + nvlist_t **child; + int error = nvlist_lookup_nvlist_array(nv, + ZPOOL_CONFIG_CHILDREN, &child, &children); + if (error != 0 || children > UINT8_MAX) + return (SET_ERROR(EINVAL)); + + uint64_t nparity; + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY, &nparity) != 0) + return (SET_ERROR(EINVAL)); + + vdev_anyraid_parity_type_t parity_type = VAP_TYPES; + if (nvlist_lookup_uint8(nv, ZPOOL_CONFIG_ANYRAID_PARITY_TYPE, + (uint8_t *)&parity_type) != 0) + return (SET_ERROR(EINVAL)); + if (parity_type != VAP_MIRROR) + return (SET_ERROR(ENOTSUP)); + + vdev_anyraid_t *var = kmem_zalloc(sizeof (*var), KM_SLEEP); + var->vd_parity_type = parity_type; + var->vd_nparity = nparity; + rw_init(&var->vd_lock, NULL, RW_DEFAULT, NULL); + avl_create(&var->vd_tile_map, anyraid_tile_compare, + sizeof (anyraid_tile_t), offsetof(anyraid_tile_t, at_node)); + avl_create(&var->vd_children_tree, anyraid_child_compare, + sizeof (vdev_anyraid_node_t), + offsetof(vdev_anyraid_node_t, van_node)); + + var->vd_children = kmem_zalloc(sizeof (*var->vd_children) * children, + KM_SLEEP); + for (int c = 0; c < children; c++) { + vdev_anyraid_node_t *van = kmem_zalloc(sizeof (*van), KM_SLEEP); + van->van_id = c; + avl_add(&var->vd_children_tree, van); + var->vd_children[c] = van; + } + + *tsd = var; + return (0); +} + +static void +vdev_anyraid_fini(vdev_t *vd) +{ + vdev_anyraid_t *var = vd->vdev_tsd; + avl_destroy(&var->vd_tile_map); + + vdev_anyraid_node_t *node; + void *cookie = NULL; + while ((node = avl_destroy_nodes(&var->vd_children_tree, &cookie))) { + kmem_free(node, sizeof (*node)); + } + avl_destroy(&var->vd_children_tree); + + rw_destroy(&var->vd_lock); + kmem_free(var->vd_children, + sizeof (*var->vd_children) * vd->vdev_children); + kmem_free(var, sizeof (*var)); +} + +/* + * Add ANYRAID specific fields to the config nvlist. + */ +static void +vdev_anyraid_config_generate(vdev_t *vd, nvlist_t *nv) +{ + ASSERT3P(vd->vdev_ops, ==, &vdev_anyraid_ops); + vdev_anyraid_t *var = vd->vdev_tsd; + + fnvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, var->vd_nparity); + fnvlist_add_uint8(nv, ZPOOL_CONFIG_ANYRAID_PARITY_TYPE, + (uint8_t)var->vd_parity_type); +} + +/* + * Import/open related functions. + */ + +/* + * Add an entry to the tile map for the provided tile. + */ +static void +create_tile_entry(vdev_anyraid_t *var, anyraid_map_loc_entry_t *amle, + uint8_t *pat_cnt, anyraid_tile_t **out_ar, uint32_t *cur_tile) +{ + uint8_t disk = amle->amle_disk; + uint16_t offset = amle->amle_offset; + anyraid_tile_t *ar = *out_ar; + + if (*pat_cnt == 0) { + ar = kmem_alloc(sizeof (*ar), KM_SLEEP); + ar->at_tile_id = *cur_tile; + avl_add(&var->vd_tile_map, ar); + list_create(&ar->at_list, + sizeof (anyraid_tile_node_t), + offsetof(anyraid_tile_node_t, atn_node)); + + (*cur_tile)++; + } + + anyraid_tile_node_t *arn = kmem_alloc(sizeof (*arn), KM_SLEEP); + arn->atn_disk = disk; + arn->atn_offset = offset; + list_insert_tail(&ar->at_list, arn); + *pat_cnt = (*pat_cnt + 1) % (var->vd_nparity + 1); + + vdev_anyraid_node_t *van = var->vd_children[disk]; + avl_remove(&var->vd_children_tree, van); + van->van_next_offset = MAX(van->van_next_offset, offset + 1); + avl_add(&var->vd_children_tree, van); + *out_ar = ar; +} + +static void +child_read_done(zio_t *zio) +{ + zio_t *pio = zio_unique_parent(zio); + abd_t **cbp = pio->io_private; + + if (zio->io_error == 0) { + mutex_enter(&pio->io_lock); + if (*cbp == NULL) + *cbp = zio->io_abd; + else + abd_free(zio->io_abd); + mutex_exit(&pio->io_lock); + } else { + abd_free(zio->io_abd); + } +} + +static void +child_read(zio_t *zio, vdev_t *vd, uint64_t offset, uint64_t size, + int checksum, void *private, int flags) +{ + for (int c = 0; c < vd->vdev_children; c++) { + child_read(zio, vd->vdev_child[c], offset, size, checksum, + private, flags); + } + + if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) { + zio_nowait(zio_read_phys(zio, vd, offset, size, + abd_alloc_linear(size, B_TRUE), checksum, + child_read_done, private, ZIO_PRIORITY_SYNC_READ, flags, + B_FALSE)); + } +} + +/* + * This function is non-static for ZDB, and shouldn't be used for anything else. + * Utility function that issues the read for the header and parses out the + * nvlist. + */ +int +vdev_anyraid_open_header(vdev_t *cvd, int header, anyraid_header_t *out_header) +{ + spa_t *spa = cvd->vdev_spa; + uint64_t ashift = cvd->vdev_ashift; + uint64_t header_offset = VDEV_LABEL_START_SIZE + + header * VDEV_ANYRAID_SINGLE_MAP_SIZE(ashift); + uint64_t header_size = VDEV_ANYRAID_MAP_HEADER_SIZE(ashift); + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | + ZIO_FLAG_SPECULATIVE; + + abd_t *header_abd = NULL; + zio_t *rio = zio_root(spa, NULL, &header_abd, flags); + child_read(rio, cvd, header_offset, header_size, ZIO_CHECKSUM_LABEL, + NULL, flags); + + int error; + if ((error = zio_wait(rio)) != 0) { + zfs_dbgmsg("Error %d reading anyraid header %d on vdev %s", + error, header, cvd->vdev_path); + abd_free(header_abd); + return (error); + } + + char *header_buf = abd_borrow_buf(header_abd, header_size); + nvlist_t *header_nvl; + error = nvlist_unpack(header_buf, header_size, &header_nvl, + KM_SLEEP); + if (error != 0) { + zfs_dbgmsg("Error %d unpacking anyraid header %d on vdev %s", + error, header, cvd->vdev_path); + abd_return_buf(header_abd, header_buf, header_size); + abd_free(header_abd); + return (error); + } + out_header->ah_abd = header_abd; + out_header->ah_buf = header_buf; + out_header->ah_nvl = header_nvl; + + return (0); +} + +static void +free_header(anyraid_header_t *header, uint64_t header_size) { + fnvlist_free(header->ah_nvl); + abd_return_buf(header->ah_abd, header->ah_buf, header_size); + abd_free(header->ah_abd); +} + +/* + * This function is non-static for ZDB, and shouldn't be used for anything else. + * + * Iterate over all the copies of the map for the given child vdev and select + * the best one. + */ +int +vdev_anyraid_pick_best_mapping(vdev_t *cvd, uint64_t *out_txg, + anyraid_header_t *out_header, int *out_mapping) +{ + spa_t *spa = cvd->vdev_spa; + uint64_t ashift = cvd->vdev_ashift; + int error = 0; + uint64_t header_size = VDEV_ANYRAID_MAP_HEADER_SIZE(ashift); + + int best_mapping = -1; + uint64_t best_txg = 0; + anyraid_header_t best_header = {0}; + boolean_t checkpoint_rb = spa_importing_checkpoint(spa); + + for (int i = 0; i < VDEV_ANYRAID_MAP_COPIES; i++) { + anyraid_header_t header; + error = vdev_anyraid_open_header(cvd, i, &header); + + if (error) + continue; + + nvlist_t *hnvl = header.ah_nvl; + uint16_t version; + if ((error = nvlist_lookup_uint16(hnvl, + VDEV_ANYRAID_HEADER_VERSION, &version)) != 0) { + free_header(&header, header_size); + zfs_dbgmsg("Anyraid header %d on vdev %s: missing " + "version", i, cvd->vdev_path); + continue; + } + if (version != 0) { + free_header(&header, header_size); + error = SET_ERROR(ENOTSUP); + zfs_dbgmsg("Anyraid header %d on vdev %s: invalid " + "version", i, cvd->vdev_path); + continue; + } + + uint64_t pool_guid = 0; + if (nvlist_lookup_uint64(hnvl, VDEV_ANYRAID_HEADER_GUID, + &pool_guid) != 0 || pool_guid != spa_guid(spa)) { + free_header(&header, header_size); + error = SET_ERROR(EINVAL); + zfs_dbgmsg("Anyraid header %d on vdev %s: guid " + "mismatch: %llu %llu", i, cvd->vdev_path, + (u_longlong_t)pool_guid, + (u_longlong_t)spa_guid(spa)); + continue; + } + + uint64_t written_txg; + if (nvlist_lookup_uint64(hnvl, VDEV_ANYRAID_HEADER_TXG, + &written_txg) != 0) { + free_header(&header, header_size); + error = SET_ERROR(EINVAL); + zfs_dbgmsg("Anyraid header %d on vdev %s: no txg", + i, cvd->vdev_path); + continue; + } + /* + * If we're reopening, the current txg hasn't been synced out + * yet; look for one txg earlier. + */ + uint64_t min_txg = spa_current_txg(spa) - + (cvd->vdev_parent->vdev_reopening ? 1 : 0); + if ((written_txg < min_txg && !checkpoint_rb) || + written_txg > spa_load_max_txg(spa)) { + free_header(&header, header_size); + error = SET_ERROR(EINVAL); + zfs_dbgmsg("Anyraid header %d on vdev %s: txg %llu out " + "of bounds (%llu, %llu)", i, cvd->vdev_path, + (u_longlong_t)written_txg, + (u_longlong_t)min_txg, + (u_longlong_t)spa_load_max_txg(spa)); + continue; + } + if (written_txg > best_txg) { + best_txg = written_txg; + best_mapping = i; + if (best_header.ah_nvl) + free_header(&best_header, header_size); + + best_header = header; + } else { + free_header(&header, header_size); + } + } + + if (best_txg != 0) { + *out_txg = best_txg; + *out_mapping = best_mapping; + *out_header = best_header; + return (0); + } + ASSERT(error); + return (error); +} + +static int +anyraid_open_existing(vdev_t *vd, uint64_t child, uint16_t **child_capacities) +{ + vdev_anyraid_t *var = vd->vdev_tsd; + vdev_t *cvd = vd->vdev_child[child]; + uint64_t ashift = cvd->vdev_ashift; + spa_t *spa = vd->vdev_spa; + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | + ZIO_FLAG_SPECULATIVE; + uint64_t header_size = VDEV_ANYRAID_MAP_HEADER_SIZE(ashift); + boolean_t checkpoint_rb = spa_importing_checkpoint(spa); + + anyraid_header_t header; + int mapping; + uint64_t txg; + int error = vdev_anyraid_pick_best_mapping(cvd, &txg, &header, + &mapping); + if (error) + return (error); + + uint8_t disk_id; + if (nvlist_lookup_uint8(header.ah_nvl, VDEV_ANYRAID_HEADER_DISK, + &disk_id) != 0) { + zfs_dbgmsg("Error opening anyraid vdev %llu: No disk ID", + (u_longlong_t)vd->vdev_id); + free_header(&header, header_size); + return (SET_ERROR(EINVAL)); + } + + uint64_t tile_size; + if (nvlist_lookup_uint64(header.ah_nvl, VDEV_ANYRAID_HEADER_TILE_SIZE, + &tile_size) != 0) { + zfs_dbgmsg("Error opening anyraid vdev %llu: No tile size", + (u_longlong_t)vd->vdev_id); + free_header(&header, header_size); + return (SET_ERROR(EINVAL)); + } + + uint32_t map_length; + if (nvlist_lookup_uint32(header.ah_nvl, VDEV_ANYRAID_HEADER_LENGTH, + &map_length) != 0) { + zfs_dbgmsg("Error opening anyraid vdev %llu: No map length", + (u_longlong_t)vd->vdev_id); + free_header(&header, header_size); + return (SET_ERROR(EINVAL)); + } + + uint16_t *caps = NULL; + uint_t count; + if (nvlist_lookup_uint16_array(header.ah_nvl, + VDEV_ANYRAID_HEADER_DISK_SIZES, &caps, &count) != 0) { + zfs_dbgmsg("Error opening anyraid vdev %llu: No child sizes", + (u_longlong_t)vd->vdev_id); + free_header(&header, header_size); + return (SET_ERROR(EINVAL)); + } + if (count != vd->vdev_children) { + zfs_dbgmsg("Error opening anyraid vdev %llu: Incorrect child " + "count %u vs %u", (u_longlong_t)vd->vdev_id, count, + (uint_t)vd->vdev_children); + free_header(&header, header_size); + return (SET_ERROR(EINVAL)); + } + + *child_capacities = kmem_alloc(sizeof (*caps) * count, KM_SLEEP); + memcpy(*child_capacities, caps, sizeof (*caps) * count); + if (vd->vdev_reopening) { + free_header(&header, header_size); + return (0); + } + + var->vd_checkpoint_tile = UINT32_MAX; + (void) nvlist_lookup_uint32(header.ah_nvl, + VDEV_ANYRAID_HEADER_CHECKPOINT, &var->vd_checkpoint_tile); + + /* + * Because the tile map is 64 MiB and the maximum IO size is 16MiB, + * we may need to issue up to 4 reads to read in the whole thing. + * Similarly, when processing the mapping, we need to iterate across + * the 4 separate buffers. + */ + zio_t *rio = zio_root(spa, NULL, NULL, flags); + abd_t *map_abds[VDEV_ANYRAID_MAP_COPIES] = {0}; + uint64_t header_offset = VDEV_LABEL_START_SIZE + + mapping * VDEV_ANYRAID_SINGLE_MAP_SIZE(ashift); + uint64_t map_offset = header_offset + header_size; + int i; + for (i = 0; i <= (map_length / SPA_MAXBLOCKSIZE); i++) { + zio_eck_t *cksum = (zio_eck_t *) + &header.ah_buf[VDEV_ANYRAID_NVL_BYTES(ashift) + + i * sizeof (*cksum)]; + zio_t *nio = zio_null(rio, spa, cvd, NULL, &map_abds[i], flags); + child_read(nio, cvd, map_offset + i * SPA_MAXBLOCKSIZE, + SPA_MAXBLOCKSIZE, ZIO_CHECKSUM_ANYRAID_MAP, cksum, flags); + zio_nowait(nio); + } + i--; + + if ((error = zio_wait(rio))) { + for (; i >= 0; i--) + abd_free(map_abds[i]); + free_header(&header, header_size); + zfs_dbgmsg("Error opening anyraid vdev %llu: map read error %d", + (u_longlong_t)vd->vdev_id, error); + return (error); + } + free_header(&header, header_size); + + uint32_t map = -1, cur_tile = 0; + /* + * For now, all entries are the size of a uint32_t. If that + * ever changes, the logic here needs to be altered to work for + * adaptive sizes, including entries split across 16MiB boundaries. + */ + uint32_t size = sizeof (anyraid_map_loc_entry_t); + uint8_t *map_buf = NULL; + uint8_t pat_cnt = 0; + anyraid_tile_t *ar = NULL; + for (uint32_t off = 0; off < map_length; off += size) { + if (checkpoint_rb && cur_tile > var->vd_checkpoint_tile && + pat_cnt == 0) + break; + + int next_map = off / SPA_MAXBLOCKSIZE; + if (map != next_map) { + // switch maps + if (map != -1) { + abd_return_buf(map_abds[map], map_buf, + SPA_MAXBLOCKSIZE); + } + map_buf = abd_borrow_buf(map_abds[next_map], + SPA_MAXBLOCKSIZE); + map = next_map; + +#ifdef _ZFS_BIG_ENDIAN + uint32_t length = map_length - + next_map * SPA_MAXBLOCKSIZE; + byteswap_uint32_array(map_buf, length < + SPA_MAXBLOCKSIZE ? length : SPA_MAXBLOCKSIZE); +#endif + } + anyraid_map_entry_t *entry = + (anyraid_map_entry_t *)(map_buf + (off % SPA_MAXBLOCKSIZE)); + uint8_t type = entry->ame_u.ame_amle.amle_type; + switch (type) { + case AMET_SKIP: { + anyraid_map_skip_entry_t *amse = + &entry->ame_u.ame_amse; + ASSERT0(pat_cnt); + cur_tile += amse_get_skip_count(amse); + break; + } + case AMET_LOC: { + anyraid_map_loc_entry_t *amle = + &entry->ame_u.ame_amle; + create_tile_entry(var, amle, &pat_cnt, &ar, + &cur_tile); + break; + } + default: + PANIC("Invalid entry type %d", type); + } + } + if (map_buf) + abd_return_buf(map_abds[map], map_buf, SPA_MAXBLOCKSIZE); + + var->vd_tile_size = tile_size; + + for (; i >= 0; i--) + abd_free(map_abds[i]); + + /* + * Now that we have the tile map read in, we have to reopen the + * children to properly set and handle the min_asize + */ + for (; i < vd->vdev_children; i++) { + vdev_t *cvd = vd->vdev_child[i]; + vdev_reopen(cvd); + } + + int lasterror = 0; + int numerrors = 0; + for (int c = 0; c < vd->vdev_children; c++) { + vdev_t *cvd = vd->vdev_child[c]; + + if (cvd->vdev_open_error != 0) { + lasterror = cvd->vdev_open_error; + numerrors++; + continue; + } + } + + if (numerrors > var->vd_nparity) { + vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS; + return (lasterror); + } + + return (0); +} + +/* + * When creating a new anyraid vdev, this function calculates the tile size + * to use. We take (by default) 1/64th of the size of the smallest disk or 16 + * GiB, whichever is larger. + */ +static int +anyraid_calculate_size(vdev_t *vd) +{ + vdev_anyraid_t *var = vd->vdev_tsd; + + uint64_t smallest_disk_size = UINT64_MAX; + for (int c = 0; c < vd->vdev_children; c++) { + vdev_t *cvd = vd->vdev_child[c]; + smallest_disk_size = MIN(smallest_disk_size, cvd->vdev_asize); + } + + uint64_t disk_shift = anyraid_disk_shift; + uint64_t min_size = zfs_anyraid_min_tile_size; + if (smallest_disk_size < 1 << disk_shift || + smallest_disk_size < min_size) { + return (SET_ERROR(ENOLCK)); + } + + + ASSERT3U(smallest_disk_size, !=, UINT64_MAX); + uint64_t tile_size = smallest_disk_size >> disk_shift; + tile_size = MAX(tile_size, min_size); + var->vd_tile_size = 1ULL << (highbit64(tile_size - 1)); + + /* + * Later, we're going to cap the metaslab size at the tile + * size, so we need a tile to hold at least enough to store a + * max-size block, or we'll assert in that code. + */ + if (var->vd_tile_size <= SPA_MAXBLOCKSIZE) + return (SET_ERROR(ENOSPC)); + return (0); +} + +struct tile_count { + avl_node_t node; + int disk; + int remaining; +}; + +static int +rc_compar(const void *a, const void *b) +{ + const struct tile_count *ra = a; + const struct tile_count *rb = b; + + int cmp = TREE_CMP(rb->remaining, ra->remaining); + if (cmp != 0) + return (cmp); + return (TREE_CMP(rb->disk, ra->disk)); +} + +/* + * I think the only way to calculate the asize for anyraid devices is to + * actually run the allocation algorithm and see what we end up with. It's a + * variant of the bin-packing problem, which is NP-hard. Thankfully + * a first-fit descending algorithm seems to give optimal results for this + * variant. + */ +static uint64_t +calculate_asize(vdev_t *vd, uint64_t *num_tiles) +{ + vdev_anyraid_t *var = vd->vdev_tsd; + + if (var->vd_nparity == 0) { + uint64_t count = 0; + for (int c = 0; c < vd->vdev_children; c++) { + count += num_tiles[c]; + } + return (count * var->vd_tile_size); + } + + /* + * Sort the disks by the number of additional tiles they can store. + */ + avl_tree_t t; + avl_create(&t, rc_compar, sizeof (struct tile_count), + offsetof(struct tile_count, node)); + for (int c = 0; c < vd->vdev_children; c++) { + if (num_tiles[c] == 0) { + ASSERT(vd->vdev_child[c]->vdev_open_error); + continue; + } + struct tile_count *rc = kmem_alloc(sizeof (*rc), KM_SLEEP); + rc->disk = c; + rc->remaining = num_tiles[c] - + var->vd_children[c]->van_next_offset; + avl_add(&t, rc); + } + + uint32_t map_width = var->vd_nparity + 1; + uint64_t count = avl_numnodes(&var->vd_tile_map); + struct tile_count **cur = kmem_alloc(sizeof (*cur) * map_width, + KM_SLEEP); + for (;;) { + /* Grab the nparity + 1 children with the most free capacity */ + for (int c = 0; c < map_width; c++) { + struct tile_count *rc = avl_first(&t); + ASSERT(rc); + cur[c] = rc; + avl_remove(&t, rc); + } + struct tile_count *rc = cur[map_width - 1]; + struct tile_count *next = avl_first(&t); + uint64_t next_rem = next == NULL ? 0 : next->remaining; + ASSERT3U(next_rem, <=, rc->remaining); + /* If one of the top N + 1 has no capacity left, we're done */ + if (rc->remaining == 0) + break; + + /* + * This is a performance optimization; if the child with the + * lowest free capacity of the ones we've selected has N more + * capacity than the next child, the next N iterations would + * all select the same children. So to save time, we add N + * tiles right now and reduce our iteration count. + */ + uint64_t this_iter = MAX(1, rc->remaining - next_rem); + count += this_iter; + + /* Re-add the selected children with their reduced capacity */ + for (int c = 0; c < map_width; c++) { + ASSERT3U(cur[c]->remaining, >=, this_iter); + cur[c]->remaining -= this_iter; + avl_add(&t, cur[c]); + } + } + for (int c = 0; c < map_width; c++) + kmem_free(cur[c], sizeof (*cur)); + kmem_free(cur, sizeof (*cur) * map_width); + void *cookie = NULL; + struct tile_count *node; + + while ((node = avl_destroy_nodes(&t, &cookie)) != NULL) + kmem_free(node, sizeof (*node)); + avl_destroy(&t); + return (count * var->vd_tile_size); +} + +static int +vdev_anyraid_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, + uint64_t *logical_ashift, uint64_t *physical_ashift) +{ + vdev_anyraid_t *var = vd->vdev_tsd; + int lasterror = 0; + int numerrors = 0; + + vdev_open_children(vd); + + for (int c = 0; c < vd->vdev_children; c++) { + vdev_t *cvd = vd->vdev_child[c]; + + if (cvd->vdev_open_error != 0) { + lasterror = cvd->vdev_open_error; + numerrors++; + continue; + } + } + + /* + * If we have more faulted disks than parity, we can't open the device. + */ + if (numerrors > var->vd_nparity) { + vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS; + return (lasterror); + } + + uint16_t *child_capacities = NULL; + if (vd->vdev_reopening) { + child_capacities = kmem_alloc(sizeof (*child_capacities) * + vd->vdev_children, KM_SLEEP); + for (uint64_t c = 0; c < vd->vdev_children; c++) { + child_capacities[c] = var->vd_children[c]->van_capacity; + } + } else if (spa_load_state(vd->vdev_spa) != SPA_LOAD_CREATE && + spa_load_state(vd->vdev_spa) != SPA_LOAD_ERROR && + spa_load_state(vd->vdev_spa) != SPA_LOAD_NONE) { + for (uint64_t c = 0; c < vd->vdev_children; c++) { + vdev_t *cvd = vd->vdev_child[c]; + if (cvd->vdev_open_error != 0) + continue; + if ((lasterror = anyraid_open_existing(vd, c, + &child_capacities)) == 0) + break; + } + if (lasterror) + return (lasterror); + } else if ((lasterror = anyraid_calculate_size(vd))) { + return (lasterror); + } + + /* + * Calculate the number of tiles each child could fit, then use that + * to calculate the asize and min_asize. + */ + uint64_t *num_tiles = kmem_zalloc(vd->vdev_children * + sizeof (*num_tiles), KM_SLEEP); + for (int c = 0; c < vd->vdev_children; c++) { + vdev_t *cvd = vd->vdev_child[c]; + + uint64_t casize; + if (cvd->vdev_open_error == 0) { + vdev_set_min_asize(cvd); + casize = cvd->vdev_asize - + VDEV_ANYRAID_TOTAL_MAP_SIZE(cvd->vdev_ashift); + } else { + ASSERT(child_capacities); + casize = child_capacities[c] * var->vd_tile_size; + } + + num_tiles[c] = casize / var->vd_tile_size; + avl_remove(&var->vd_children_tree, var->vd_children[c]); + var->vd_children[c]->van_capacity = num_tiles[c]; + avl_add(&var->vd_children_tree, var->vd_children[c]); + } + *asize = calculate_asize(vd, num_tiles); + + for (int c = 0; c < vd->vdev_children; c++) { + vdev_t *cvd = vd->vdev_child[c]; + + uint64_t cmasize; + if (cvd->vdev_open_error == 0) { + cmasize = cvd->vdev_max_asize - + VDEV_ANYRAID_TOTAL_MAP_SIZE(cvd->vdev_ashift); + } else { + cmasize = child_capacities[c] * var->vd_tile_size; + } + + num_tiles[c] = cmasize / var->vd_tile_size; + } + *max_asize = calculate_asize(vd, num_tiles); + + if (child_capacities) { + kmem_free(child_capacities, sizeof (*child_capacities) * + vd->vdev_children); + } + for (int c = 0; c < vd->vdev_children; c++) { + vdev_t *cvd = vd->vdev_child[c]; + + if (cvd->vdev_open_error != 0) + continue; + + *logical_ashift = MAX(*logical_ashift, cvd->vdev_ashift); + *physical_ashift = vdev_best_ashift(*logical_ashift, + *physical_ashift, cvd->vdev_physical_ashift); + } + return (0); +} + +/* + * We cap the metaslab size at the tile size. This prevents us from having to + * split IOs across multiple tiles, which would be complex extra logic for + * little gain. + */ +static void +vdev_anyraid_metaslab_size(vdev_t *vd, uint64_t *shiftp) +{ + vdev_anyraid_t *var = vd->vdev_tsd; + *shiftp = MIN(*shiftp, highbit64(var->vd_tile_size) - 1); +} + +static void +vdev_anyraid_close(vdev_t *vd) +{ + vdev_anyraid_t *var = vd->vdev_tsd; + for (int c = 0; c < vd->vdev_children; c++) { + if (vd->vdev_child[c] != NULL) + vdev_close(vd->vdev_child[c]); + } + if (vd->vdev_reopening) + return; + anyraid_tile_t *tile = NULL; + void *cookie = NULL; + while ((tile = avl_destroy_nodes(&var->vd_tile_map, &cookie))) { + if (var->vd_nparity != 0) { + anyraid_tile_node_t *atn = NULL; + while ((atn = list_remove_head(&tile->at_list))) { + kmem_free(atn, sizeof (*atn)); + } + list_destroy(&tile->at_list); + } + kmem_free(tile, sizeof (*tile)); + } +} + +/* + * I/O related functions. + */ + +/* + * Configure the mirror_map and then hand the write off to the normal mirror + * logic. + */ +static void +vdev_anyraid_mirror_start(zio_t *zio, anyraid_tile_t *tile) +{ + vdev_t *vd = zio->io_vd; + vdev_anyraid_t *var = vd->vdev_tsd; + mirror_map_t *mm = vdev_mirror_map_alloc(var->vd_nparity + 1, B_FALSE, + B_FALSE); + uint64_t rsize = var->vd_tile_size; + + anyraid_tile_node_t *arn = list_head(&tile->at_list); + for (int c = 0; c < mm->mm_children; c++) { + ASSERT(arn); + mirror_child_t *mc = &mm->mm_child[c]; + mc->mc_vd = vd->vdev_child[arn->atn_disk]; + mc->mc_offset = VDEV_ANYRAID_TOTAL_MAP_SIZE(vd->vdev_ashift) + + arn->atn_offset * rsize + zio->io_offset % rsize; + ASSERT3U(mc->mc_offset, <, mc->mc_vd->vdev_psize - + VDEV_LABEL_END_SIZE); + mm->mm_rebuilding = mc->mc_rebuilding = B_FALSE; + arn = list_next(&tile->at_list, arn); + } + ASSERT(arn == NULL); + + zio->io_vsd = mm; + zio->io_vsd_ops = &vdev_mirror_vsd_ops; + + vdev_mirror_io_start_impl(zio, mm); +} + +typedef struct anyraid_map { + abd_t *am_abd; +} anyraid_map_t; + +static void +vdev_anyraid_map_free_vsd(zio_t *zio) +{ + anyraid_map_t *mm = zio->io_vsd; + abd_free(mm->am_abd); + mm->am_abd = NULL; + kmem_free(mm, sizeof (*mm)); +} + +const zio_vsd_ops_t vdev_anyraid_vsd_ops = { + .vsd_free = vdev_anyraid_map_free_vsd, +}; + +static void +vdev_anyraid_child_done(zio_t *zio) +{ + zio_t *pio = zio->io_private; + pio->io_error = zio_worst_error(pio->io_error, zio->io_error); +} + +static void +vdev_anyraid_io_start(zio_t *zio) +{ + vdev_t *vd = zio->io_vd; + vdev_anyraid_t *var = vd->vdev_tsd; + uint64_t rsize = var->vd_tile_size; + + uint64_t start_tile_id = zio->io_offset / rsize; + anyraid_tile_t search; + search.at_tile_id = start_tile_id; + avl_index_t where; + rw_enter(&var->vd_lock, RW_READER); + anyraid_tile_t *tile = avl_find(&var->vd_tile_map, &search, + &where); + + /* + * If we're doing an I/O somewhere that hasn't been allocated yet, we + * may need to allocate a new tile. Upgrade to a write lock so we can + * safely modify the data structure, and then check if someone else + * beat us to it. + */ + if (tile == NULL) { + rw_exit(&var->vd_lock); + rw_enter(&var->vd_lock, RW_WRITER); + tile = avl_find(&var->vd_tile_map, &search, &where); + } + if (tile == NULL) { + ASSERT3U(zio->io_type, ==, ZIO_TYPE_WRITE); + zfs_dbgmsg("Allocating tile %llu for zio %px", + (u_longlong_t)start_tile_id, zio); + tile = kmem_alloc(sizeof (*tile), KM_SLEEP); + tile->at_tile_id = start_tile_id; + list_create(&tile->at_list, sizeof (anyraid_tile_node_t), + offsetof(anyraid_tile_node_t, atn_node)); + + uint_t width = var->vd_nparity + 1; + vdev_anyraid_node_t **vans = kmem_alloc(sizeof (*vans) * width, + KM_SLEEP); + for (int i = 0; i < width; i++) { + vans[i] = avl_first(&var->vd_children_tree); + avl_remove(&var->vd_children_tree, vans[i]); + + anyraid_tile_node_t *arn = + kmem_alloc(sizeof (*arn), KM_SLEEP); + arn->atn_disk = vans[i]->van_id; + arn->atn_offset = + vans[i]->van_next_offset++; + list_insert_tail(&tile->at_list, arn); + } + for (int i = 0; i < width; i++) + avl_add(&var->vd_children_tree, vans[i]); + + kmem_free(vans, sizeof (*vans) * width); + avl_insert(&var->vd_tile_map, tile, where); + } + rw_exit(&var->vd_lock); + + ASSERT3U(zio->io_offset % rsize + zio->io_size, <=, + var->vd_tile_size); + + if (var->vd_nparity > 0) { + vdev_anyraid_mirror_start(zio, tile); + zio_execute(zio); + return; + } + + anyraid_tile_node_t *arn = list_head(&tile->at_list); + vdev_t *cvd = vd->vdev_child[arn->atn_disk]; + uint64_t child_offset = arn->atn_offset * rsize + + zio->io_offset % rsize; + child_offset += VDEV_ANYRAID_TOTAL_MAP_SIZE(vd->vdev_ashift); + + anyraid_map_t *mm = kmem_alloc(sizeof (*mm), KM_SLEEP); + mm->am_abd = abd_get_offset(zio->io_abd, 0); + zio->io_vsd = mm; + zio->io_vsd_ops = &vdev_anyraid_vsd_ops; + + zio_t *cio = zio_vdev_child_io(zio, NULL, cvd, child_offset, + mm->am_abd, zio->io_size, zio->io_type, zio->io_priority, 0, + vdev_anyraid_child_done, zio); + zio_nowait(cio); + + zio_execute(zio); +} + +static void +vdev_anyraid_io_done(zio_t *zio) +{ + vdev_t *vd = zio->io_vd; + vdev_anyraid_t *var = vd->vdev_tsd; + + if (var->vd_nparity > 0) + vdev_mirror_io_done(zio); +} + +static void +vdev_anyraid_state_change(vdev_t *vd, int faulted, int degraded) +{ + vdev_anyraid_t *var = vd->vdev_tsd; + if (faulted > var->vd_nparity) { + vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_NO_REPLICAS); + } else if (degraded + faulted != 0) { + vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, VDEV_AUX_NONE); + } else { + vdev_set_state(vd, B_FALSE, VDEV_STATE_HEALTHY, VDEV_AUX_NONE); + } +} + +/* + * Determine if any portion of the provided block resides on a child vdev + * with a dirty DTL and therefore needs to be resilvered. The function + * assumes that at least one DTL is dirty which implies that full stripe + * width blocks must be resilvered. + */ +static boolean_t +vdev_anyraid_need_resilver(vdev_t *vd, const dva_t *dva, size_t psize, + uint64_t phys_birth) +{ + (void) psize; + vdev_anyraid_t *var = vd->vdev_tsd; + if (!vdev_dtl_contains(vd, DTL_PARTIAL, phys_birth, 1)) + return (B_FALSE); + + uint64_t start_tile_id = DVA_GET_OFFSET(dva) / var->vd_tile_size; + anyraid_tile_t search; + search.at_tile_id = start_tile_id; + avl_index_t where; + rw_enter(&var->vd_lock, RW_READER); + anyraid_tile_t *tile = avl_find(&var->vd_tile_map, &search, + &where); + rw_exit(&var->vd_lock); + ASSERT(tile); + + for (anyraid_tile_node_t *arn = list_head(&tile->at_list); + arn != NULL; arn = list_next(&tile->at_list, arn)) { + vdev_t *cvd = vd->vdev_child[arn->atn_disk]; + + if (!vdev_dtl_empty(cvd, DTL_PARTIAL)) + return (B_TRUE); + } + + return (B_FALSE); +} + +/* + * Right now, we don't translate anything beyond the end of the allocated + * ranges for the target leaf vdev. This means that trim and initialize won't + * affect those areas on anyraid devices. Given the target use case, this is + * not a significant concern, but a rework of the xlate logic could enable this + * in the future. + */ +static void +vdev_anyraid_xlate(vdev_t *cvd, const zfs_range_seg64_t *logical_rs, + zfs_range_seg64_t *physical_rs, zfs_range_seg64_t *remain_rs) +{ + vdev_t *anyraidvd = cvd->vdev_parent; + ASSERT3P(anyraidvd->vdev_ops, ==, &vdev_anyraid_ops); + vdev_anyraid_t *var = anyraidvd->vdev_tsd; + uint64_t rsize = var->vd_tile_size; + + uint64_t start_tile_id = logical_rs->rs_start / rsize; + ASSERT3U(start_tile_id, ==, (logical_rs->rs_end - 1) / rsize); + anyraid_tile_t search; + search.at_tile_id = start_tile_id; + avl_index_t where; + rw_enter(&var->vd_lock, RW_READER); + anyraid_tile_t *tile = avl_find(&var->vd_tile_map, &search, + &where); + rw_exit(&var->vd_lock); + // This tile doesn't exist yet + if (tile == NULL) { + physical_rs->rs_start = physical_rs->rs_end = 0; + return; + } + anyraid_tile_node_t *arn = list_head(&tile->at_list); + for (; arn != NULL; arn = list_next(&tile->at_list, arn)) + if (anyraidvd->vdev_child[arn->atn_disk] == cvd) + break; + // The tile exists, but isn't stored on this child + if (arn == NULL) { + physical_rs->rs_start = physical_rs->rs_end = 0; + return; + } + + uint64_t child_offset = arn->atn_offset * rsize + + logical_rs->rs_start % rsize; + child_offset += VDEV_ANYRAID_TOTAL_MAP_SIZE(anyraidvd->vdev_ashift); + uint64_t size = logical_rs->rs_end - logical_rs->rs_start; + + physical_rs->rs_start = child_offset; + physical_rs->rs_end = child_offset + size; + remain_rs->rs_start = 0; + remain_rs->rs_end = 0; +} + +static uint64_t +vdev_anyraid_nparity(vdev_t *vd) +{ + vdev_anyraid_t *var = vd->vdev_tsd; + return (var->vd_nparity); +} + +static uint64_t +vdev_anyraid_ndisks(vdev_t *vd) +{ + return (vd->vdev_children); +} + +/* + * Functions related to syncing out the tile map each TXG. + */ +static boolean_t +map_write_loc_entry(anyraid_tile_node_t *arn, void *buf, uint32_t *offset) +{ + anyraid_map_loc_entry_t *entry = (void *)((char *)buf + *offset); + entry->amle_type = AMET_LOC; + entry->amle_disk = arn->atn_disk; + entry->amle_offset = arn->atn_offset; + *offset += sizeof (*entry); + return (*offset == SPA_MAXBLOCKSIZE); +} + +static boolean_t +map_write_skip_entry(uint32_t tile, void *buf, uint32_t *offset, + uint32_t prev_id) +{ + anyraid_map_skip_entry_t *entry = (void *)((char *)buf + *offset); + amse_set_type(entry); + amse_set_skip_count(entry, tile - prev_id - 1); + *offset += sizeof (*entry); + return (*offset == SPA_MAXBLOCKSIZE); +} + +static void +anyraid_map_write_done(zio_t *zio) +{ + abd_free(zio->io_abd); +} + +static void +map_write_issue(zio_t *zio, vdev_t *vd, uint64_t base_offset, + uint8_t idx, uint32_t length, abd_t *abd, zio_eck_t *cksum_out, + int flags) +{ +#ifdef _ZFS_BIG_ENDIAN + byteswap_uint32_array(abd, length); +#else + (void) length; +#endif + + zio_nowait(zio_write_phys(zio, vd, base_offset + + idx * VDEV_ANYRAID_MAP_SIZE + + VDEV_ANYRAID_MAP_HEADER_SIZE(vd->vdev_ashift), SPA_MAXBLOCKSIZE, + abd, ZIO_CHECKSUM_ANYRAID_MAP, anyraid_map_write_done, cksum_out, + ZIO_PRIORITY_SYNC_WRITE, flags, B_FALSE)); +} + +static void +vdev_anyraid_write_map_done(zio_t *zio) +{ + uint64_t *good_writes = zio->io_private; + + if (zio->io_error == 0 && good_writes != NULL) + atomic_inc_64(good_writes); +} + +void +vdev_anyraid_write_map_sync(vdev_t *vd, zio_t *pio, uint64_t txg, + uint64_t *good_writes, int flags, vdev_config_sync_status_t status) +{ + vdev_t *anyraidvd = vd->vdev_parent; + ASSERT3P(anyraidvd->vdev_ops, ==, &vdev_anyraid_ops); + spa_t *spa = vd->vdev_spa; + vdev_anyraid_t *var = anyraidvd->vdev_tsd; + uint32_t header_size = VDEV_ANYRAID_MAP_HEADER_SIZE(vd->vdev_ashift); + uint32_t full_size = VDEV_ANYRAID_SINGLE_MAP_SIZE(vd->vdev_ashift); + uint32_t nvl_bytes = VDEV_ANYRAID_NVL_BYTES(vd->vdev_ashift); + uint8_t update_target = txg % VDEV_ANYRAID_MAP_COPIES; + uint64_t base_offset = VDEV_LABEL_START_SIZE + + update_target * full_size; + + abd_t *header_abd = + abd_alloc_linear(header_size, B_TRUE); + abd_zero(header_abd, header_size); + void *header_buf = abd_borrow_buf(header_abd, header_size); + zio_eck_t *cksums = (zio_eck_t *)&((char *)header_buf)[nvl_bytes]; + + abd_t *map_abd = abd_alloc_linear(SPA_MAXBLOCKSIZE, B_TRUE); + uint8_t written = 0; + void *buf = abd_borrow_buf(map_abd, SPA_MAXBLOCKSIZE); + + rw_enter(&var->vd_lock, RW_READER); + anyraid_tile_t *cur = avl_first(&var->vd_tile_map); + anyraid_tile_node_t *curn = cur != NULL ? + list_head(&cur->at_list) : NULL; + uint32_t buf_offset = 0, prev_id = UINT32_MAX; + zio_t *zio = zio_root(spa, NULL, NULL, flags); + /* Write out each sub-tile in turn */ + while (cur) { + if (status == VDEV_CONFIG_REWINDING_CHECKPOINT && + cur->at_tile_id > var->vd_checkpoint_tile) + break; + + anyraid_tile_t *next = AVL_NEXT(&var->vd_tile_map, cur); + IMPLY(prev_id != UINT32_MAX, cur->at_tile_id >= prev_id); + /* + * Determine if we need to write a skip entry before the + * current one. + */ + boolean_t skip = + (prev_id == UINT32_MAX && cur->at_tile_id != 0) || + (prev_id != UINT32_MAX && cur->at_tile_id > prev_id + 1); + if ((skip && map_write_skip_entry(cur->at_tile_id, buf, + &buf_offset, prev_id)) || + (!skip && map_write_loc_entry(curn, buf, &buf_offset))) { + // Let the final write handle it + if (next == NULL) + break; + abd_return_buf_copy(map_abd, buf, SPA_MAXBLOCKSIZE); + map_write_issue(zio, vd, base_offset, written, + buf_offset, map_abd, &cksums[written], flags); + + map_abd = abd_alloc_linear(SPA_MAXBLOCKSIZE, B_TRUE); + written++; + ASSERT3U(written, <, + VDEV_ANYRAID_MAP_SIZE / SPA_MAXBLOCKSIZE); + buf = abd_borrow_buf(map_abd, SPA_MAXBLOCKSIZE); + buf_offset = 0; + } + prev_id = cur->at_tile_id; + /* + * Advance the current sub-tile; if it moves us past the end + * of the current list of sub-tiles, start the next tile. + */ + if (!skip) { + curn = list_next(&cur->at_list, curn); + if (curn == NULL) { + cur = next; + curn = cur != NULL ? + list_head(&cur->at_list) : NULL; + } + } + } + + if (status == VDEV_CONFIG_NO_CHECKPOINT || + status == VDEV_CONFIG_REWINDING_CHECKPOINT) { + var->vd_checkpoint_tile = UINT32_MAX; + } else if (status == VDEV_CONFIG_CREATING_CHECKPOINT) { + anyraid_tile_t *ar = avl_last(&var->vd_tile_map); + ASSERT(ar); + var->vd_checkpoint_tile = ar->at_tile_id; + } + rw_exit(&var->vd_lock); + + abd_return_buf_copy(map_abd, buf, SPA_MAXBLOCKSIZE); + map_write_issue(zio, vd, base_offset, written, buf_offset, map_abd, + &cksums[written], flags); + + if (zio_wait(zio)) + return; + + // Populate the header + uint16_t *sizes = kmem_zalloc(sizeof (*sizes) * + anyraidvd->vdev_children, KM_SLEEP); + uint64_t disk_id = 0; + for (uint64_t i = 0; i < anyraidvd->vdev_children; i++) { + if (anyraidvd->vdev_child[i] == vd) + disk_id = i; + sizes[i] = var->vd_children[i]->van_capacity; + } + ASSERT3U(disk_id, <, anyraidvd->vdev_children); + nvlist_t *header = fnvlist_alloc(); + fnvlist_add_uint16(header, VDEV_ANYRAID_HEADER_VERSION, 0); + fnvlist_add_uint8(header, VDEV_ANYRAID_HEADER_DISK, disk_id); + fnvlist_add_uint64(header, VDEV_ANYRAID_HEADER_TXG, txg); + fnvlist_add_uint64(header, VDEV_ANYRAID_HEADER_GUID, spa_guid(spa)); + fnvlist_add_uint64(header, VDEV_ANYRAID_HEADER_TILE_SIZE, + var->vd_tile_size); + fnvlist_add_uint32(header, VDEV_ANYRAID_HEADER_LENGTH, + written * SPA_MAXBLOCKSIZE + buf_offset); + fnvlist_add_uint16_array(header, VDEV_ANYRAID_HEADER_DISK_SIZES, sizes, + anyraidvd->vdev_children); + + if (var->vd_checkpoint_tile != UINT32_MAX) { + fnvlist_add_uint32(header, VDEV_ANYRAID_HEADER_CHECKPOINT, + var->vd_checkpoint_tile); + } + size_t packed_size; + char *packed = NULL; + VERIFY0(nvlist_pack(header, &packed, &packed_size, NV_ENCODE_XDR, + KM_SLEEP)); + ASSERT3U(packed_size, <, nvl_bytes); + memcpy(header_buf, packed, packed_size); + fnvlist_pack_free(packed, packed_size); + abd_return_buf_copy(header_abd, header_buf, header_size); + + // Write out the header + zio_t *header_zio = zio_write_phys(pio, vd, base_offset, header_size, + header_abd, ZIO_CHECKSUM_LABEL, vdev_anyraid_write_map_done, + good_writes, ZIO_PRIORITY_SYNC_WRITE, flags, B_FALSE); + zio_nowait(header_zio); + abd_free(header_abd); +} + +static uint64_t +vdev_anyraid_min_asize(vdev_t *pvd, vdev_t *cvd) +{ + ASSERT3P(pvd->vdev_ops, ==, &vdev_anyraid_ops); + ASSERT3U(spa_config_held(pvd->vdev_spa, SCL_ALL, RW_READER), !=, 0); + vdev_anyraid_t *var = pvd->vdev_tsd; + if (var->vd_tile_size == 0) + return (VDEV_ANYRAID_TOTAL_MAP_SIZE(cvd->vdev_ashift)); + + rw_enter(&var->vd_lock, RW_READER); + uint64_t size = VDEV_ANYRAID_TOTAL_MAP_SIZE(cvd->vdev_ashift) + + var->vd_children[cvd->vdev_id]->van_next_offset * + var->vd_tile_size; + rw_exit(&var->vd_lock); + return (size); +} + +/* + * Used by the attach logic to determine if a device is big enough to be + * usefully attached. + */ +uint64_t +vdev_anyraid_min_newsize(vdev_t *vd, uint64_t ashift) +{ + vdev_anyraid_t *var = vd->vdev_tsd; + return (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE + + VDEV_ANYRAID_TOTAL_MAP_SIZE(ashift) + var->vd_tile_size); +} + +void +vdev_anyraid_expand(vdev_t *tvd, vdev_t *newvd) +{ + vdev_anyraid_t *var = tvd->vdev_tsd; + uint64_t old_children = tvd->vdev_children - 1; + + ASSERT3U(spa_config_held(tvd->vdev_spa, SCL_ALL, RW_WRITER), ==, + SCL_ALL); + vdev_anyraid_node_t **nc = kmem_alloc(tvd->vdev_children * sizeof (*nc), + KM_SLEEP); + vdev_anyraid_node_t *newchild = kmem_alloc(sizeof (*newchild), + KM_SLEEP); + newchild->van_id = newvd->vdev_id; + newchild->van_next_offset = 0; + newchild->van_capacity = (newvd->vdev_asize - + VDEV_ANYRAID_TOTAL_MAP_SIZE(newvd->vdev_ashift)) / + var->vd_tile_size; + rw_enter(&var->vd_lock, RW_WRITER); + memcpy(nc, var->vd_children, old_children * sizeof (*nc)); + kmem_free(var->vd_children, old_children * sizeof (*nc)); + var->vd_children = nc; + var->vd_children[old_children] = newchild; + avl_add(&var->vd_children_tree, newchild); + rw_exit(&var->vd_lock); +} + +boolean_t +vdev_anyraid_mapped(vdev_t *vd, uint64_t offset) +{ + vdev_anyraid_t *var = vd->vdev_tsd; + anyraid_tile_t search; + search.at_tile_id = offset / var->vd_tile_size; + + rw_enter(&var->vd_lock, RW_READER); + anyraid_tile_t *tile = avl_find(&var->vd_tile_map, &search, NULL); + boolean_t result = tile == NULL; + rw_exit(&var->vd_lock); + + return (result); +} + +vdev_ops_t vdev_anyraid_ops = { + .vdev_op_init = vdev_anyraid_init, + .vdev_op_fini = vdev_anyraid_fini, + .vdev_op_open = vdev_anyraid_open, + .vdev_op_close = vdev_anyraid_close, + .vdev_op_psize_to_asize = vdev_default_asize, + .vdev_op_asize_to_psize = vdev_default_asize, + .vdev_op_min_asize = vdev_anyraid_min_asize, + .vdev_op_min_alloc = NULL, + .vdev_op_io_start = vdev_anyraid_io_start, + .vdev_op_io_done = vdev_anyraid_io_done, + .vdev_op_state_change = vdev_anyraid_state_change, + .vdev_op_need_resilver = vdev_anyraid_need_resilver, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = NULL, + .vdev_op_xlate = vdev_anyraid_xlate, + .vdev_op_rebuild_asize = NULL, // TODO do we want to support rebuilds? + .vdev_op_metaslab_init = NULL, + .vdev_op_config_generate = vdev_anyraid_config_generate, + .vdev_op_nparity = vdev_anyraid_nparity, + .vdev_op_ndisks = vdev_anyraid_ndisks, + .vdev_op_metaslab_size = vdev_anyraid_metaslab_size, + .vdev_op_type = VDEV_TYPE_ANYRAID, /* name of this vdev type */ + .vdev_op_leaf = B_FALSE /* not a leaf vdev */ +}; + + +ZFS_MODULE_PARAM(zfs_anyraid, zfs_anyraid_, min_tile_size, U64, ZMOD_RW, + "Minimum tile size for anyraid"); \ No newline at end of file diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index bb2a6baff863..c3c1d767c321 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -145,6 +145,7 @@ #include #include #include +#include #include #include #include @@ -1869,7 +1870,10 @@ vdev_extra_sync(zio_t *zio, uint64_t *good_writes, vdev_t *vd, int flags, if (!vdev_writeable(vd)) return; - // TODO Invoke extra sync logic for anyraid + if (vd->vdev_parent->vdev_ops == &vdev_anyraid_ops) { + vdev_anyraid_write_map_sync(vd, zio, txg, good_writes, flags, + status); + } } /* Sync the extra data of all vdevs in svd[] */ @@ -1884,7 +1888,10 @@ vdev_extra_sync_list(vdev_t **svd, int svdcount, int flags, uint64_t txg, boolean_t have_extra = B_FALSE; for (int i = 0; i < svdcount; i++) { - // TODO use this for anyraid + if (svd[i]->vdev_ops == &vdev_anyraid_ops) { + have_extra = B_TRUE; + break; + } } if (!have_extra) return (0); diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 74373f759cec..276c8663f983 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -4260,6 +4260,8 @@ zio_dva_allocate(zio_t *zio) flags |= METASLAB_GANG_CHILD; if (zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE) flags |= METASLAB_ASYNC_ALLOC; + if (zio->io_flags & ZIO_FLAG_ZILWRITE) + flags |= METASLAB_ZIL; /* * If not already chosen, choose an appropriate allocation class. @@ -5137,7 +5139,9 @@ zio_checksum_generate(zio_t *zio) if (checksum == ZIO_CHECKSUM_OFF) return (zio); - ASSERT(checksum == ZIO_CHECKSUM_LABEL); + ASSERTF(checksum == ZIO_CHECKSUM_LABEL || + checksum == ZIO_CHECKSUM_ANYRAID_MAP, + "checksum not label: %px %d", zio, checksum); } else { if (BP_IS_GANG(bp) && zio->io_child_type == ZIO_CHILD_GANG) { ASSERT(!IO_IS_ALLOCATING(zio)); @@ -5169,7 +5173,10 @@ zio_checksum_verify(zio_t *zio) if (zio->io_prop.zp_checksum == ZIO_CHECKSUM_OFF) return (zio); - ASSERT3U(zio->io_prop.zp_checksum, ==, ZIO_CHECKSUM_LABEL); + ASSERTF(zio->io_prop.zp_checksum == ZIO_CHECKSUM_LABEL || + zio->io_prop.zp_checksum == ZIO_CHECKSUM_ANYRAID_MAP, + "checksum not label: %px %d", zio, + zio->io_prop.zp_checksum); } ASSERT0(zio->io_post & ZIO_POST_DIO_CHKSUM_ERR); diff --git a/module/zfs/zio_checksum.c b/module/zfs/zio_checksum.c index 1d0646a61185..1585744651af 100644 --- a/module/zfs/zio_checksum.c +++ b/module/zfs/zio_checksum.c @@ -206,6 +206,8 @@ zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { abd_checksum_blake3_tmpl_init, abd_checksum_blake3_tmpl_free, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP | ZCHECKSUM_FLAG_SALTED | ZCHECKSUM_FLAG_NOPWRITE, "blake3"}, + {{abd_checksum_sha256, abd_checksum_sha256}, + NULL, NULL, ZCHECKSUM_FLAG_METADATA, "anyraid_map"}, }; /* @@ -408,6 +410,12 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, abd_copy_from_buf_off(abd, &cksum, eck_offset + offsetof(zio_eck_t, zec_cksum), sizeof (zio_cksum_t)); + } else if (checksum == ZIO_CHECKSUM_ANYRAID_MAP) { + zio_eck_t *eck = (zio_eck_t *)(zio->io_private); + ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum], + &cksum); + eck->zec_cksum = cksum; + memcpy(&eck->zec_magic, &zec_magic, sizeof (zec_magic)); } else { saved = bp->blk_cksum; ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum], @@ -419,13 +427,14 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, } int -zio_checksum_error_impl(spa_t *spa, const blkptr_t *bp, - enum zio_checksum checksum, abd_t *abd, uint64_t size, uint64_t offset, - zio_bad_cksum_t *info) +zio_checksum_error_impl(zio_t *zio, enum zio_checksum checksum, abd_t *abd, + uint64_t size, uint64_t offset, zio_bad_cksum_t *info) { zio_checksum_info_t *ci = &zio_checksum_table[checksum]; zio_cksum_t actual_cksum, expected_cksum; zio_eck_t eck; + spa_t *spa = zio->io_spa; + const blkptr_t *bp = zio->io_bp; int byteswap; if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL) @@ -433,8 +442,8 @@ zio_checksum_error_impl(spa_t *spa, const blkptr_t *bp, zio_checksum_template_init(checksum, spa); - IMPLY(bp == NULL, ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED); - IMPLY(bp == NULL, checksum == ZIO_CHECKSUM_LABEL); + IMPLY(bp == NULL, checksum == ZIO_CHECKSUM_LABEL || + checksum == ZIO_CHECKSUM_ANYRAID_MAP); if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) { zio_cksum_t verifier; @@ -498,6 +507,12 @@ zio_checksum_error_impl(spa_t *spa, const blkptr_t *bp, byteswap_uint64_array(&expected_cksum, sizeof (zio_cksum_t)); } + } else if (checksum == ZIO_CHECKSUM_ANYRAID_MAP) { + eck = *(zio_eck_t *)(zio->io_private); + byteswap = (eck.zec_magic == BSWAP_64(ZEC_MAGIC)); + expected_cksum = eck.zec_cksum; + ci->ci_func[byteswap](abd, size, + spa->spa_cksum_tmpls[checksum], &actual_cksum); } else { byteswap = BP_SHOULD_BYTESWAP(bp); expected_cksum = bp->blk_cksum; @@ -548,24 +563,24 @@ zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info) uint64_t size = bp ? BP_GET_PSIZE(bp) : zio->io_size; uint64_t offset = zio->io_offset; abd_t *data = zio->io_abd; - spa_t *spa = zio->io_spa; if (bp && BP_IS_GANG(bp)) { - if (spa_feature_is_active(spa, SPA_FEATURE_DYNAMIC_GANG_HEADER)) + if (spa_feature_is_active(zio->io_spa, + SPA_FEATURE_DYNAMIC_GANG_HEADER)) size = zio->io_size; else size = SPA_OLD_GANGBLOCKSIZE; } - error = zio_checksum_error_impl(spa, bp, checksum, data, size, - offset, info); + error = zio_checksum_error_impl(zio, checksum, data, size, offset, + info); if (error && bp && BP_IS_GANG(bp) && size > SPA_OLD_GANGBLOCKSIZE) { /* * It's possible that this is an old gang block. Rerun * the checksum with the old size; if that passes, then * update the gangblocksize appropriately. */ - error = zio_checksum_error_impl(spa, bp, checksum, data, + error = zio_checksum_error_impl(zio, checksum, data, SPA_OLD_GANGBLOCKSIZE, offset, info); if (error == 0) { ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_VDEV); diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index a69c6e3c8dd7..c9c36b9e15b5 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -40,6 +40,14 @@ tests = ['alloc_class_001_pos', 'alloc_class_002_neg', 'alloc_class_003_pos', 'alloc_class_013_pos', 'alloc_class_016_pos'] tags = ['functional', 'alloc_class'] +[tests/functional/anyraid] +tests = [ 'anyraid_clean_mirror_001_pos', 'anyraid_clean_mirror_002_pos', + 'anyraid_clean_mirror_003_pos', 'anyraid_tile_layout', 'anyraid_checkpoint', + 'anyraid_faildisk_write_replace_resilver', + 'anyraid_offline_write_online_resilver', + 'anyraid_special_vdev_001_pos', 'anyraid_special_vdev_002_pos'] +tags = ['functional', 'anyraid'] + [tests/functional/append] tests = ['file_append', 'threadsappend_001_pos'] tags = ['functional', 'append'] @@ -401,7 +409,8 @@ tests = ['zpool_add_001_pos', 'zpool_add_002_pos', 'zpool_add_003_pos', tags = ['functional', 'cli_root', 'zpool_add'] [tests/functional/cli_root/zpool_attach] -tests = ['zpool_attach_001_neg', 'attach-o_ashift'] +tests = ['zpool_attach_001_neg', 'zpool_attach_002_pos', 'zpool_attach_003_pos', + 'attach-o_ashift'] tags = ['functional', 'cli_root', 'zpool_attach'] [tests/functional/cli_root/zpool_clear] @@ -418,6 +427,8 @@ tests = ['zpool_create_001_pos', 'zpool_create_002_pos', 'zpool_create_017_neg', 'zpool_create_018_pos', 'zpool_create_019_pos', 'zpool_create_020_pos', 'zpool_create_021_pos', 'zpool_create_022_pos', 'zpool_create_023_neg', 'zpool_create_024_pos', + 'zpool_create_anyraid_001_pos', 'zpool_create_anyraid_002_pos', + 'zpool_create_anyraid_003_pos', 'zpool_create_encrypted', 'zpool_create_crypt_combos', 'zpool_create_draid_001_pos', 'zpool_create_draid_002_pos', 'zpool_create_draid_003_pos', 'zpool_create_draid_004_pos', @@ -449,6 +460,7 @@ tags = ['functional', 'cli_root', 'zpool_events'] [tests/functional/cli_root/zpool_export] tests = ['zpool_export_001_pos', 'zpool_export_002_pos', 'zpool_export_003_neg', 'zpool_export_004_pos', + 'zpool_export_anyraid_001_pos', 'zpool_export_parallel_pos', 'zpool_export_parallel_admin'] tags = ['functional', 'cli_root', 'zpool_export'] @@ -504,6 +516,7 @@ tags = ['functional', 'cli_root', 'zpool_labelclear'] [tests/functional/cli_root/zpool_initialize] tests = ['zpool_initialize_attach_detach_add_remove', + 'zpool_initialize_anyraid_attach', 'zpool_initialize_fault_export_import_online', 'zpool_initialize_import_export', 'zpool_initialize_multiple_pools', @@ -517,7 +530,6 @@ tests = ['zpool_initialize_attach_detach_add_remove', 'zpool_initialize_unsupported_vdevs', 'zpool_initialize_verify_checksums', 'zpool_initialize_verify_initialized'] -pre = tags = ['functional', 'cli_root', 'zpool_initialize'] [tests/functional/cli_root/zpool_offline] diff --git a/tests/runfiles/sanity.run b/tests/runfiles/sanity.run index 249b415029c4..16a55a537712 100644 --- a/tests/runfiles/sanity.run +++ b/tests/runfiles/sanity.run @@ -255,7 +255,7 @@ tests = ['zpool_add_002_pos', 'zpool_add_003_pos', tags = ['functional', 'cli_root', 'zpool_add'] [tests/functional/cli_root/zpool_attach] -tests = ['zpool_attach_001_neg'] +tests = ['zpool_attach_001_neg', 'zpool_attach_002_pos'] tags = ['functional', 'cli_root', 'zpool_attach'] [tests/functional/cli_root/zpool_clear] @@ -318,7 +318,6 @@ tags = ['functional', 'cli_root', 'zpool_labelclear'] [tests/functional/cli_root/zpool_initialize] tests = ['zpool_initialize_online_offline'] -pre = tags = ['functional', 'cli_root', 'zpool_initialize'] [tests/functional/cli_root/zpool_offline] diff --git a/tests/zfs-tests/include/default.cfg.in b/tests/zfs-tests/include/default.cfg.in index 4e009acaff91..5b0bb04fd229 100644 --- a/tests/zfs-tests/include/default.cfg.in +++ b/tests/zfs-tests/include/default.cfg.in @@ -140,6 +140,10 @@ export MAX_FINDDISKSNUM=6 # Default minimum size for file based vdevs in the test suite export MINVDEVSIZE=$((256 * 1024 * 1024)) +# AnyRAID has higher requirements by design, +# it depends on the minimal region size +export MINVDEVSIZE2=$((24 * 1024 * 1024 * 1024)) + # Minimum vdev size possible as defined in the OS export SPA_MINDEVSIZE=$((64 * 1024 * 1024)) diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index 8b30b9b91641..9feef066fe91 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -1856,7 +1856,7 @@ function verify_pool function get_disklist # pool { echo $(zpool iostat -v $1 | awk '(NR > 4) {print $1}' | \ - grep -vEe '^-----' -e "^(mirror|raidz[1-3]|draid[1-3]|spare|log|cache|special|dedup)|\-[0-9]$") + grep -vEe '^-----' -e "^(mirror|raidz[1-3]|anyraid|draid[1-3]|spare|log|cache|special|dedup)|\-[0-9]$") } # @@ -2218,6 +2218,30 @@ BEGIN { FS="."; } echo $unused } +function create_sparse_files +{ + typeset prefix=$1 + typeset -i count=$2 + typeset size=$3 + + log_must mkdir -p $TESTDIR/sparse_files + + typeset sfiles="" + for (( i=0; i { typeset group=$1 diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg index 54b50c9dba77..38dd44e1fc6a 100644 --- a/tests/zfs-tests/include/tunables.cfg +++ b/tests/zfs-tests/include/tunables.cfg @@ -18,6 +18,7 @@ UNAME=$(uname) cat <<%%%% | ADMIN_SNAPSHOT UNSUPPORTED zfs_admin_snapshot ALLOW_REDACTED_DATASET_MOUNT allow_redacted_dataset_mount zfs_allow_redacted_dataset_mount +ANYRAID_MIN_TILE_SIZE anyraid.min_tile_size zfs_anyraid_min_tile_size ARC_MAX arc.max zfs_arc_max ARC_MIN arc.min zfs_arc_min ASYNC_BLOCK_MAX_BLOCKS async_block_max_blocks zfs_async_block_max_blocks diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 23284234cdf7..c50c5c51677f 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -89,6 +89,8 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \ functional/acl/acl_common.kshlib \ functional/alloc_class/alloc_class.cfg \ functional/alloc_class/alloc_class.kshlib \ + functional/anyraid/anyraid_common.kshlib \ + functional/anyraid/default.cfg \ functional/atime/atime.cfg \ functional/atime/atime_common.kshlib \ functional/bclone/bclone.cfg \ @@ -434,6 +436,17 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/alloc_class/alloc_class_016_pos.ksh \ functional/alloc_class/cleanup.ksh \ functional/alloc_class/setup.ksh \ + functional/anyraid/anyraid_checkpoint.ksh \ + functional/anyraid/anyraid_clean_mirror_001_pos.ksh \ + functional/anyraid/anyraid_clean_mirror_002_pos.ksh \ + functional/anyraid/anyraid_clean_mirror_003_pos.ksh \ + functional/anyraid/anyraid_faildisk_write_replace_resilver.ksh \ + functional/anyraid/anyraid_offline_write_online_resilver.ksh \ + functional/anyraid/anyraid_tile_layout.ksh \ + functional/anyraid/anyraid_special_vdev_001_pos.ksh \ + functional/anyraid/anyraid_special_vdev_002_pos.ksh \ + functional/anyraid/cleanup.ksh \ + functional/anyraid/setup.ksh \ functional/append/file_append.ksh \ functional/append/threadsappend_001_pos.ksh \ functional/append/cleanup.ksh \ @@ -1037,6 +1050,8 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zpool_attach/cleanup.ksh \ functional/cli_root/zpool_attach/setup.ksh \ functional/cli_root/zpool_attach/zpool_attach_001_neg.ksh \ + functional/cli_root/zpool_attach/zpool_attach_002_pos.ksh \ + functional/cli_root/zpool_attach/zpool_attach_003_pos.ksh \ functional/cli_root/zpool/cleanup.ksh \ functional/cli_root/zpool_clear/cleanup.ksh \ functional/cli_root/zpool_clear/setup.ksh \ @@ -1070,6 +1085,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zpool_create/zpool_create_022_pos.ksh \ functional/cli_root/zpool_create/zpool_create_023_neg.ksh \ functional/cli_root/zpool_create/zpool_create_024_pos.ksh \ + functional/cli_root/zpool_create/zpool_create_anyraid_001_pos.ksh \ + functional/cli_root/zpool_create/zpool_create_anyraid_002_pos.ksh \ + functional/cli_root/zpool_create/zpool_create_anyraid_003_pos.ksh \ functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh \ functional/cli_root/zpool_create/zpool_create_draid_001_pos.ksh \ functional/cli_root/zpool_create/zpool_create_draid_002_pos.ksh \ @@ -1116,6 +1134,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zpool_export/zpool_export_002_pos.ksh \ functional/cli_root/zpool_export/zpool_export_003_neg.ksh \ functional/cli_root/zpool_export/zpool_export_004_pos.ksh \ + functional/cli_root/zpool_export/zpool_export_anyraid_001_pos.ksh \ functional/cli_root/zpool_export/zpool_export_parallel_admin.ksh \ functional/cli_root/zpool_export/zpool_export_parallel_pos.ksh \ functional/cli_root/zpool_get/cleanup.ksh \ @@ -1187,7 +1206,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zpool_iostat/zpool_iostat_interval_all.ksh \ functional/cli_root/zpool_iostat/zpool_iostat_interval_some.ksh \ functional/cli_root/zpool_initialize/cleanup.ksh \ + functional/cli_root/zpool_initialize/setup.ksh \ functional/cli_root/zpool_initialize/zpool_initialize_attach_detach_add_remove.ksh \ + functional/cli_root/zpool_initialize/zpool_initialize_anyraid_attach.ksh \ functional/cli_root/zpool_initialize/zpool_initialize_fault_export_import_online.ksh \ functional/cli_root/zpool_initialize/zpool_initialize_import_export.ksh \ functional/cli_root/zpool_initialize/zpool_initialize_multiple_pools.ksh \ diff --git a/tests/zfs-tests/tests/functional/anyraid/anyraid_checkpoint.ksh b/tests/zfs-tests/tests/functional/anyraid/anyraid_checkpoint.ksh new file mode 100755 index 000000000000..26876894b004 --- /dev/null +++ b/tests/zfs-tests/tests/functional/anyraid/anyraid_checkpoint.ksh @@ -0,0 +1,64 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025 Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Anyraid works correctly with checkpoints +# +# STRATEGY: +# 1. Create an anyraid vdev +# 2. Take a checkpoint +# 3. Allocate more space +# 4. Roll back to the checkpoint +# 5. Verify that the tile map looks like what it did originally +# + +verify_runnable "global" + +cleanup() { + zpool destroy $TESTPOOL +} + +log_onexit cleanup + +log_must create_pool $TESTPOOL anyraid1 $DISKS + +log_assert "Anyraid works correctly with checkpoints" + +map=$(zdb --anyraid-map $TESTPOOL) +log_must zpool checkpoint $TESTPOOL + +log_must dd if=/dev/urandom of=/$TESTPOOL/f1 bs=1M count=2k + +log_must zpool export $TESTPOOL +log_must zpool import --rewind-to-checkpoint $TESTPOOL +map2=$(zdb --anyraid-map $TESTPOOL) +log_must test "$map" == "$map2" + +log_pass "Anyraid works correctly with checkpoints" diff --git a/tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_001_pos.ksh b/tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_001_pos.ksh new file mode 100755 index 000000000000..ce16d6db482d --- /dev/null +++ b/tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_001_pos.ksh @@ -0,0 +1,55 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/tests/functional/anyraid/anyraid_common.kshlib + +# +# DESCRIPTION: +# AnyRAID mirror1 can survive having 1 failed disk. +# +# STRATEGY: +# 1. Write several files to the ZFS filesystem mirror. +# 2. Override one of the disks of the mirror with zeroes. +# 3. Verify that all the file contents are unchanged on the file system. +# + +verify_runnable "global" + +log_assert "AnyRAID mirror1 can survive having 1 failed disk" + +log_must create_sparse_files "disk" 3 $DEVSIZE + +clean_mirror_spec_cases "anyraid1 $disk0 $disk1" \ + "$disk0" \ + "$disk1" + +clean_mirror_spec_cases "anyraid1 $disk0 $disk1 $disk2" \ + "$disk0" \ + "$disk1" \ + "$disk2" + +log_pass "AnyRAID mirror1 can survive having 1 failed disk" diff --git a/tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_002_pos.ksh b/tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_002_pos.ksh new file mode 100755 index 000000000000..60f94cb99af2 --- /dev/null +++ b/tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_002_pos.ksh @@ -0,0 +1,66 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/tests/functional/anyraid/anyraid_common.kshlib + +# +# DESCRIPTION: +# AnyRAID mirror2 can survive having 1-2 failed disks. +# +# STRATEGY: +# 1. Write several files to the ZFS filesystem mirror. +# 2. Override the selected disks of the mirror with zeroes. +# 3. Verify that all the file contents are unchanged on the file system. +# + +verify_runnable "global" + +log_assert "AnyRAID mirror2 can survive having 1-2 failed disks" + +log_must create_sparse_files "disk" 4 $DEVSIZE + +clean_mirror_spec_cases "anyraid2 $disk0 $disk1 $disk2" \ + "$disk0" \ + "$disk1" \ + "$disk2" \ + "\"$disk0 $disk1\"" \ + "\"$disk0 $disk2\"" \ + "\"$disk1 $disk2\"" + +clean_mirror_spec_cases "anyraid2 $disk0 $disk1 $disk2 $disk3" \ + "$disk0" \ + "$disk1" \ + "$disk2" \ + "$disk3" \ + "\"$disk0 $disk1\"" \ + "\"$disk0 $disk2\"" \ + "\"$disk0 $disk3\"" \ + "\"$disk1 $disk2\"" \ + "\"$disk1 $disk3\"" \ + "\"$disk2 $disk3\"" + +log_pass "AnyRAID mirror2 can survive having 1-2 failed disks" diff --git a/tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_003_pos.ksh b/tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_003_pos.ksh new file mode 100755 index 000000000000..85393052d861 --- /dev/null +++ b/tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_003_pos.ksh @@ -0,0 +1,62 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/tests/functional/anyraid/anyraid_common.kshlib + +# +# DESCRIPTION: +# AnyRAID mirror3 can survive having 1-3 failed disks. +# +# STRATEGY: +# 1. Write several files to the ZFS filesystem mirror. +# 2. Override the selected disks of the mirror with zeroes. +# 3. Verify that all the file contents are unchanged on the file system. +# + +verify_runnable "global" + +log_assert "AnyRAID mirror3 can survive having 1-3 failed disks" + +log_must create_sparse_files "disk" 4 $DEVSIZE + +clean_mirror_spec_cases "anyraid3 $disk0 $disk1 $disk2 $disk3" \ + "$disk0" \ + "$disk1" \ + "$disk2" \ + "$disk3" \ + "\"$disk0 $disk1\"" \ + "\"$disk0 $disk2\"" \ + "\"$disk0 $disk3\"" \ + "\"$disk1 $disk2\"" \ + "\"$disk1 $disk3\"" \ + "\"$disk2 $disk3\"" \ + "\"$disk0 $disk1 $disk2\"" \ + "\"$disk0 $disk1 $disk3\"" \ + "\"$disk0 $disk2 $disk3\"" \ + "\"$disk1 $disk2 $disk3\"" + +log_pass "AnyRAID mirror3 can survive having 1-3 failed disks" diff --git a/tests/zfs-tests/tests/functional/anyraid/anyraid_common.kshlib b/tests/zfs-tests/tests/functional/anyraid/anyraid_common.kshlib new file mode 100644 index 000000000000..47d004d55a94 --- /dev/null +++ b/tests/zfs-tests/tests/functional/anyraid/anyraid_common.kshlib @@ -0,0 +1,99 @@ +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/anyraid/default.cfg + +function wipe_some_disks_and_verify_content_is_still_okay +{ + typeset pool=$1 + shift + + typeset atfile=0 + set -A files + set -A cksums + typeset newcksum + + while (( atfile < FILE_COUNT )); do + files[$atfile]=/$pool/file.$atfile + log_must file_write -o create -f ${files[$atfile]} \ + -b $FILE_SIZE -c 1 + cksums[$atfile]=$(xxh128digest ${files[$atfile]}) + (( atfile = atfile + 1 )) + done + + for disk in $@; do + log_must dd if=/dev/zero of=$disk seek=8 bs=$DD_BLOCK \ + count=$(( DD_COUNT - 128 )) conv=notrunc + done + + # + # Flush out the cache so that we ensure we're reading from disk. + # + log_must zpool status + log_must zpool export $pool + log_must zpool import -d $(dirname $1) + log_must zpool import -d $(dirname $1) $pool + + atfile=0 + typeset -i failedcount=0 + while (( atfile < FILE_COUNT )); do + newcksum=$(xxh128digest ${files[$atfile]}) + if [[ $newcksum != ${cksums[$atfile]} ]]; then + (( failedcount = failedcount + 1 )) + log_note "Wrong checksum of ${files[$atfile]}" + fi + (( atfile = atfile + 1 )) + done + + if [[ $failedcount > 0 ]]; then + log_fail "$failedcount of the $FILE_COUNT files did not" \ + "have the same checksum before and after" + fi + + log_must zpool status + log_must zpool scrub $TESTPOOL + log_must wait_scrubbed $TESTPOOL + log_must zpool status +} + +function clean_mirror_spec_cases +{ + typeset poolspec=$1 + shift + + typeset tcases + eval "typeset -a tcases=($*)" + + log_note "pool specification: $poolspec" + + for tcase in "${tcases[@]}"; do + log_note "failed disk case: $tcase" + log_must zpool create -f $TESTPOOL $poolspec + wipe_some_disks_and_verify_content_is_still_okay $TESTPOOL $tcase + poolexists $TESTPOOL && destroy_pool $TESTPOOL + done +} diff --git a/tests/zfs-tests/tests/functional/anyraid/anyraid_faildisk_write_replace_resilver.ksh b/tests/zfs-tests/tests/functional/anyraid/anyraid_faildisk_write_replace_resilver.ksh new file mode 100755 index 000000000000..5227d480e14a --- /dev/null +++ b/tests/zfs-tests/tests/functional/anyraid/anyraid_faildisk_write_replace_resilver.ksh @@ -0,0 +1,90 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/tests/functional/anyraid/anyraid_common.kshlib + +# +# DESCRIPTION: +# AnyRAID mirror can resilver a replaced disk. +# +# STRATEGY: +# 1. Fail one disk. +# 2. Write new data to the pool. +# 3. Get that disk replaced and resilvered. +# 4. Repeat to verify sequential resilvering. +# + +verify_runnable "global" + +log_assert "AnyRAID mirror can resilver a replaced disk" + +cleanup() { + poolexists $TESTPOOL && destroy_pool $TESTPOOL +} + +log_onexit cleanup + + +# anyraid1 + +for replace_flags in '' '-s'; do + + log_must create_sparse_files "disk" 3 $DEVSIZE + log_must create_sparse_files "spare" 1 $DEVSIZE + log_must zpool create -f $TESTPOOL anyraid1 $disks + log_must zfs set primarycache=none $TESTPOOL + + # Write initial data + log_must dd if=/dev/urandom of=/$TESTPOOL/file1.bin bs=1M count=$(( DEVSIZE / 2 / 1024 / 1024 )) + + # Fail one disk + log_must truncate -s0 $disk0 + + # Read initial data, write new data + dd if=/$TESTPOOL/file1.bin of=/dev/null bs=1M count=$(( DEVSIZE / 2 / 1024 / 1024 )) + log_must dd if=/dev/urandom of=/$TESTPOOL/file1.bin bs=1M count=$(( DEVSIZE / 2 / 1024 / 1024 )) + + # Check that disk is faulted + zpool status + log_must check_state $TESTPOOL $disk0 "faulted" + + # Initiate disk replacement + log_must zpool replace -f $replace_flags $TESTPOOL $disk0 $spare0 + + # Wait until resilvering is done and the pool is back online + for i in {1..60}; do + check_state $TESTPOOL "" "online" && break + sleep 1 + done + zpool status + log_must check_state $TESTPOOL "" "online" + + destroy_pool $TESTPOOL + +done + +log_pass "AnyRAID mirror can resilver a replaced disk" diff --git a/tests/zfs-tests/tests/functional/anyraid/anyraid_offline_write_online_resilver.ksh b/tests/zfs-tests/tests/functional/anyraid/anyraid_offline_write_online_resilver.ksh new file mode 100755 index 000000000000..f19115b12e43 --- /dev/null +++ b/tests/zfs-tests/tests/functional/anyraid/anyraid_offline_write_online_resilver.ksh @@ -0,0 +1,128 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/tests/functional/anyraid/anyraid_common.kshlib + +# +# DESCRIPTION: +# AnyRAID mirror can resilver a disk after it gets back online. +# +# STRATEGY: +# 1. Offline one disk. +# 2. Write to the pool. +# 3. Get that disk back online. +# 4. Get it resilvered. +# + +verify_runnable "global" + +log_assert "AnyRAID mirror can resilver a disk after it gets back online" + +cleanup() { + poolexists $TESTPOOL && destroy_pool $TESTPOOL +} + +log_onexit cleanup + +# anyraid1 + +log_must create_sparse_files "disk" 3 $DEVSIZE +log_must zpool create -f $TESTPOOL anyraid1 $disks + +log_must zpool offline $TESTPOOL $disk0 +log_must check_state $TESTPOOL $disk0 "offline" +log_must check_state $TESTPOOL "" "degraded" + +log_must dd if=/dev/urandom of=/$TESTPOOL/file.bin bs=1M count=128 +log_must zpool online $TESTPOOL $disk0 +log_must check_state $TESTPOOL $disk0 "online" +for i in {1..60}; do + check_state $TESTPOOL "" "online" && break + sleep 1 +done +zpool status +log_must check_state $TESTPOOL "" "online" + +log_must destroy_pool $TESTPOOL + + +# anyraid2 + +log_must create_sparse_files "disk" 5 $DEVSIZE +log_must zpool create -f $TESTPOOL anyraid2 $disks + +log_must zpool offline $TESTPOOL $disk0 +log_must zpool offline $TESTPOOL $disk1 +log_must check_state $TESTPOOL $disk0 "offline" +log_must check_state $TESTPOOL $disk1 "offline" +log_must check_state $TESTPOOL "" "degraded" + +log_must dd if=/dev/urandom of=/$TESTPOOL/file.bin bs=1M count=128 +log_must zpool online $TESTPOOL $disk0 +log_must zpool online $TESTPOOL $disk1 +log_must check_state $TESTPOOL $disk0 "online" +log_must check_state $TESTPOOL $disk1 "online" +for i in {1..60}; do + check_state $TESTPOOL "" "online" && break + sleep 1 +done +zpool status +log_must check_state $TESTPOOL "" "online" + +log_must destroy_pool $TESTPOOL + + +# anyraid3 + +log_must create_sparse_files "disk" 7 $DEVSIZE +log_must zpool create -f $TESTPOOL anyraid3 $disks + +log_must zpool offline $TESTPOOL $disk0 +log_must zpool offline $TESTPOOL $disk1 +log_must zpool offline $TESTPOOL $disk2 +log_must check_state $TESTPOOL $disk0 "offline" +log_must check_state $TESTPOOL $disk1 "offline" +log_must check_state $TESTPOOL $disk2 "offline" +log_must check_state $TESTPOOL "" "degraded" + +log_must dd if=/dev/urandom of=/$TESTPOOL/file.bin bs=1M count=128 +log_must zpool online $TESTPOOL $disk0 +log_must zpool online $TESTPOOL $disk1 +log_must zpool online $TESTPOOL $disk2 +log_must check_state $TESTPOOL $disk0 "online" +log_must check_state $TESTPOOL $disk1 "online" +log_must check_state $TESTPOOL $disk2 "online" +for i in {1..60}; do + check_state $TESTPOOL "" "online" && break + sleep 1 +done +zpool status +log_must check_state $TESTPOOL "" "online" + +log_must destroy_pool $TESTPOOL + +log_pass "AnyRAID mirror can resilver a disk after it gets back online" diff --git a/tests/zfs-tests/tests/functional/anyraid/anyraid_special_vdev_001_pos.ksh b/tests/zfs-tests/tests/functional/anyraid/anyraid_special_vdev_001_pos.ksh new file mode 100755 index 000000000000..c316ea1039c8 --- /dev/null +++ b/tests/zfs-tests/tests/functional/anyraid/anyraid_special_vdev_001_pos.ksh @@ -0,0 +1,72 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/tests/functional/anyraid/anyraid_common.kshlib + +# +# DESCRIPTION: +# Verify a variety of AnyRAID pools with a special VDEV mirror. +# +# STRATEGY: +# 1. Create an AnyRAID pool with a special VDEV mirror. +# 2. Write to it, sync. +# 3. Export and re-import the pool. +# 4. Verify that all the file contents are unchanged on the file system. +# + +verify_runnable "global" + +function cleanup +{ + poolexists $TESTPOOL && destroy_pool $TESTPOOL +} +log_onexit cleanup + +log_assert "Verify a variety of AnyRAID pools with a special VDEV mirror" + +log_must create_sparse_files "disk" 4 $DEVSIZE +log_must create_sparse_files "sdisk" 2 $DEVSIZE + +typeset oldcksum +typeset newcksum +for parity in {0..3}; do + log_must zpool create -f $TESTPOOL anyraid$parity $disks special mirror $sdisks + log_must poolexists $TESTPOOL + + log_must dd if=/dev/urandom of=/$TESTPOOL/file.bin bs=1M count=128 + oldcksum=$(xxh128digest /$TESTPOOL/file.bin) + log_must zpool export $TESTPOOL + + log_must zpool import -d $(dirname $disk0) $TESTPOOL + newcksum=$(xxh128digest /$TESTPOOL/file.bin) + + log_must test "$oldcksum" = "$newcksum" + + log_must destroy_pool $TESTPOOL +done + +log_pass "Verify a variety of AnyRAID pools with a special VDEV mirror" diff --git a/tests/zfs-tests/tests/functional/anyraid/anyraid_special_vdev_002_pos.ksh b/tests/zfs-tests/tests/functional/anyraid/anyraid_special_vdev_002_pos.ksh new file mode 100755 index 000000000000..6adea12dbfcc --- /dev/null +++ b/tests/zfs-tests/tests/functional/anyraid/anyraid_special_vdev_002_pos.ksh @@ -0,0 +1,72 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/tests/functional/anyraid/anyraid_common.kshlib + +# +# DESCRIPTION: +# Verify a variety of AnyRAID pools with a special VDEV AnyRAID. +# +# STRATEGY: +# 1. Create an AnyRAID pool with a special VDEV AnyRAID. +# 2. Write to it, sync. +# 3. Export and re-import the pool. +# 4. Verify that all the file contents are unchanged on the file system. +# + +verify_runnable "global" + +function cleanup +{ + poolexists $TESTPOOL && destroy_pool $TESTPOOL +} +log_onexit cleanup + +log_assert "Verify a variety of AnyRAID pools with a special VDEV AnyRAID" + +log_must create_sparse_files "disk" 4 $DEVSIZE +log_must create_sparse_files "sdisk" 4 $DEVSIZE + +typeset oldcksum +typeset newcksum +for parity in {0..3}; do + log_must zpool create $TESTPOOL anyraid$parity $disks special anyraid$parity $sdisks + log_must poolexists $TESTPOOL + + log_must dd if=/dev/urandom of=/$TESTPOOL/file.bin bs=1M count=128 + oldcksum=$(xxh128digest /$TESTPOOL/file.bin) + log_must zpool export $TESTPOOL + + log_must zpool import -d $(dirname $disk0) $TESTPOOL + newcksum=$(xxh128digest /$TESTPOOL/file.bin) + + log_must test "$oldcksum" = "$newcksum" + + log_must destroy_pool $TESTPOOL +done + +log_pass "Verify a variety of AnyRAID pools with a special VDEV AnyRAID" diff --git a/tests/zfs-tests/tests/functional/anyraid/anyraid_tile_layout.ksh b/tests/zfs-tests/tests/functional/anyraid/anyraid_tile_layout.ksh new file mode 100755 index 000000000000..89f6679353a5 --- /dev/null +++ b/tests/zfs-tests/tests/functional/anyraid/anyraid_tile_layout.ksh @@ -0,0 +1,70 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025 Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Anyraid disks intelligently select which tiles to use +# +# STRATEGY: +# 1. Create an anyraid1 vdev with 1 large disk and 2 small disks +# 2. Verify that the full space can be used +# + +verify_runnable "global" + +cleanup() { + zpool destroy $TESTPOOL2 + zpool destroy $TESTPOOL + set_tunable64 ANYRAID_MIN_TILE_SIZE 1073741824 +} + +log_onexit cleanup + +log_must create_pool $TESTPOOL $DISKS + +log_must truncate -s 512M /$TESTPOOL/vdev_file.{0,1,2} +log_must truncate -s 1G /$TESTPOOL/vdev_file.3 +set_tunable64 ANYRAID_MIN_TILE_SIZE 67108864 + +log_assert "Anyraid disks intelligently select which tiles to use" + +log_must create_pool $TESTPOOL2 anyraid1 /$TESTPOOL/vdev_file.{0,1,2,3} + +cap=$(zpool get -Hp -o value size $TESTPOOL2) +[[ "$cap" -eq $((9 * 64 * 1024 * 1024)) ]] || \ + log_fail "Incorrect space for anyraid vdev: $cap" + +# +# This should just about fill the pool, when you account for the 128MiB of +# reserved slop space. If the space isn't being selected intelligently, we +# would hit ENOSPC 64MiB early. +# +log_must dd if=/dev/urandom of=/$TESTPOOL2/f1 bs=1M count=$((64 * 7 - 1)) + +log_pass "Anyraid disks intelligently select which tiles to use" diff --git a/tests/zfs-tests/tests/functional/anyraid/cleanup.ksh b/tests/zfs-tests/tests/functional/anyraid/cleanup.ksh new file mode 100755 index 000000000000..0e239571f23a --- /dev/null +++ b/tests/zfs-tests/tests/functional/anyraid/cleanup.ksh @@ -0,0 +1,34 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/anyraid/default.cfg + +poolexists $TESTPOOL && destroy_pool $TESTPOOL + +log_must delete_sparse_files +restore_tunable ANYRAID_MIN_TILE_SIZE diff --git a/tests/zfs-tests/tests/functional/anyraid/default.cfg b/tests/zfs-tests/tests/functional/anyraid/default.cfg new file mode 100644 index 000000000000..db3db19fb7aa --- /dev/null +++ b/tests/zfs-tests/tests/functional/anyraid/default.cfg @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +export DEVSIZE=4294967296 +export DD_BLOCK=$(( 64 * 1024 )) +export DD_COUNT=$(( DEVSIZE / DD_BLOCK )) + +export FILE_COUNT=10 +export FILE_SIZE=$(( 1024 * 1024 )) diff --git a/tests/zfs-tests/tests/functional/anyraid/setup.ksh b/tests/zfs-tests/tests/functional/anyraid/setup.ksh new file mode 100755 index 000000000000..3e923fdbb0ff --- /dev/null +++ b/tests/zfs-tests/tests/functional/anyraid/setup.ksh @@ -0,0 +1,36 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/anyraid/default.cfg + +verify_runnable "global" + +save_tunable ANYRAID_MIN_TILE_SIZE +set_tunable64 ANYRAID_MIN_TILE_SIZE 1073741824 + +log_pass diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib index 08795a7ea257..7e447a2f7b81 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib @@ -65,6 +65,7 @@ function setup_filesystem #disklist #pool #fs #mntpoint #type #vdev if [[ $vdev != "" && \ $vdev != "mirror" && \ $vdev != "raidz" && \ + $vdev != "anyraid" && \ $vdev != "draid" ]] ; then log_note "Wrong vdev: (\"$vdev\")" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_add/cleanup.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_add/cleanup.ksh index 3c16a6f97f4a..4ffcd5cda088 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_add/cleanup.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_add/cleanup.ksh @@ -33,4 +33,6 @@ . $STF_SUITE/include/libtest.shlib . $STF_SUITE/tests/functional/cli_root/zpool_add/zpool_add.kshlib +delete_sparse_files + log_pass diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_001_pos.ksh index 82d19e850f28..bd4bce221568 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_001_pos.ksh @@ -49,32 +49,31 @@ verify_runnable "global" function cleanup { poolexists $TESTPOOL && destroy_pool $TESTPOOL - rm -f $disk0 $disk1 } log_assert "'zpool add ...' can add devices to the pool." log_onexit cleanup -set -A keywords "" "mirror" "raidz" "raidz1" "draid:1s" "draid1:1s" "spare" +set -A keywords "" "mirror" "raidz" "raidz1" "anyraid" "anyraid1" "anyraid2" "anyraid3" "draid:1s" "draid1:1s" "spare" + +create_sparse_files "disk" 4 $MINVDEVSIZE2 +create_sparse_files "extradisk" 4 $MINVDEVSIZE2 pooldevs="${DISK0} \ \"${DISK0} ${DISK1}\" \ \"${DISK0} ${DISK1} ${DISK2}\"" mirrordevs="\"${DISK0} ${DISK1}\"" raidzdevs="\"${DISK0} ${DISK1}\"" +anyraiddevs="\"${extradisks}\"" draiddevs="\"${DISK0} ${DISK1} ${DISK2}\"" -disk0=$TEST_BASE_DIR/disk0 -disk1=$TEST_BASE_DIR/disk1 -disk2=$TEST_BASE_DIR/disk2 -truncate -s $MINVDEVSIZE $disk0 $disk1 $disk2 - typeset -i i=0 typeset vdev eval set -A poolarray $pooldevs eval set -A mirrorarray $mirrordevs eval set -A raidzarray $raidzdevs +eval set -A anyraidarray $anyraiddevs eval set -A draidarray $draiddevs while (( $i < ${#keywords[*]} )); do @@ -111,6 +110,16 @@ while (( $i < ${#keywords[*]} )); do destroy_pool "$TESTPOOL" done + ;; + anyraid*) + for vdev in "${anyraidarray[@]}"; do + create_pool "$TESTPOOL" "${keywords[i]}" $disks + log_must poolexists "$TESTPOOL" + log_must zpool add "$TESTPOOL" ${keywords[i]} $vdev + log_must vdevs_in_pool "$TESTPOOL" "$vdev" + destroy_pool "$TESTPOOL" + done + ;; draid:1s|draid1:1s) for vdev in "${draidarray[@]}"; do diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_009_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_009_neg.ksh index 2e1590faf8f5..d43d9a0b3fa6 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_009_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_009_neg.ksh @@ -57,17 +57,19 @@ log_assert "'zpool add' should fail if vdevs are the same or vdev is " \ log_onexit cleanup -create_pool $TESTPOOL $DISK0 +create_sparse_files "disk" 2 $MINVDEVSIZE2 + +create_pool $TESTPOOL $disk0 log_must poolexists $TESTPOOL -log_mustnot zpool add -f $TESTPOOL $DISK0 +log_mustnot zpool add -f $TESTPOOL $disk0 -for type in "" "mirror" "raidz" "draid" "spare" "log" "dedup" "special" "cache" +for type in "" "mirror" "raidz" "anyraid" "draid" "spare" "log" "dedup" "special" "cache" do - log_mustnot zpool add -f $TESTPOOL $type $DISK0 $DISK1 - log_mustnot zpool add --allow-in-use $TESTPOOL $type $DISK0 $DISK1 - log_mustnot zpool add -f $TESTPOOL $type $DISK1 $DISK1 - log_mustnot zpool add --allow-in-use $TESTPOOL $type $DISK1 $DISK1 + log_mustnot zpool add -f $TESTPOOL $type $disk0 $disk1 + log_mustnot zpool add --allow-in-use $TESTPOOL $type $disk0 $disk1 + log_mustnot zpool add -f $TESTPOOL $type $disk1 $disk1 + log_mustnot zpool add --allow-in-use $TESTPOOL $type $disk1 $disk1 done log_pass "'zpool add' get fail as expected if vdevs are the same or vdev is " \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_attach/zpool_attach_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_attach/zpool_attach_002_pos.ksh new file mode 100755 index 000000000000..aae9a8605ff2 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_attach/zpool_attach_002_pos.ksh @@ -0,0 +1,70 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025 Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# 'zpool attach' works to expand mirrors and anyraid vdevs +# +# STRATEGY: +# 1. Create a normal striped pool +# 2. Verify that attaching creates a mirror +# 3. Verify that attaching again creates a wider mirror +# 4. Create an anyraid vdev +# 5. Verify that attaching expands the anyraid vdev +# + +verify_runnable "global" + +cleanup() { + log_must zpool destroy $TESTPOOL2 + restore_tunable ANYRAID_MIN_TILE_SIZE +} + +log_onexit cleanup + +log_must truncate -s 8G /$TESTPOOL/vdev_file.{0,1,2,3} +save_tunable ANYRAID_MIN_TILE_SIZE +set_tunable64 ANYRAID_MIN_TILE_SIZE 1073741824 + +log_assert "'zpool attach' works to expand mirrors and anyraid vdevs" + +log_must create_pool $TESTPOOL2 /$TESTPOOL/vdev_file.0 +log_must zpool attach $TESTPOOL2 /$TESTPOOL/vdev_file.0 /$TESTPOOL/vdev_file.1 +log_must eval "zpool list -v $TESTPOOL2 | grep \" mirror\"" +log_must eval "zpool list -v $TESTPOOL2 | grep \" .*_file.0\"" +log_must eval "zpool list -v $TESTPOOL2 | grep \" .*_file.1\"" +log_must zpool attach $TESTPOOL2 /$TESTPOOL/vdev_file.0 /$TESTPOOL/vdev_file.2 +log_must eval "zpool list -v $TESTPOOL2 | grep \" .*_file.2\"" +log_must zpool destroy $TESTPOOL2 + +log_must create_pool $TESTPOOL2 anyraid1 /$TESTPOOL/vdev_file.{0,1,2} +log_must zpool attach $TESTPOOL2 anyraid-0 /$TESTPOOL/vdev_file.3 +log_must eval "zpool list -v $TESTPOOL2 | grep \" .*_file.3\"" + +log_pass "'zpool attach' works to expand mirrors and anyraid vdevs" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_attach/zpool_attach_003_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_attach/zpool_attach_003_pos.ksh new file mode 100755 index 000000000000..67ac4c2e7bb3 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_attach/zpool_attach_003_pos.ksh @@ -0,0 +1,99 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025 Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# 'zpool attach' expands size correctly with anyraid vdevs. +# +# STRATEGY: +# 1. Create an anyraid1 vdev with small disks +# 2. Attach larger disk +# 3. Verify that not all the new space can be used +# 4. Attach another larger disk +# 5. Verify that all space is now usable +# 6. Repeat steps 1-5 with anyraid2 +# + +verify_runnable "global" + +cleanup() { + log_must zpool destroy $TESTPOOL2 + rm /$TESTPOOL/vdev_file.* + restore_tunable ANYRAID_MIN_TILE_SIZE +} + +log_onexit cleanup + +log_must truncate -s 512M /$TESTPOOL/vdev_file.{0,1,2,3} +log_must truncate -s 2G /$TESTPOOL/vdev_file.{4,5,6} +save_tunable ANYRAID_MIN_TILE_SIZE +set_tunable64 ANYRAID_MIN_TILE_SIZE 67108864 + +log_assert "'zpool attach' expands size correctly with anyraid vdevs" + +log_must create_pool $TESTPOOL2 anyraid1 /$TESTPOOL/vdev_file.{0,1,2} + +cap=$(zpool get -Hp -o value size $TESTPOOL2) +log_must zpool attach $TESTPOOL2 anyraid1-0 /$TESTPOOL/vdev_file.4 +new_cap=$(zpool get -Hp -o value size $TESTPOOL2) +new_cap=$((new_cap - cap)) + +[[ "$new_cap" -eq $((3 * 64 * 1024 * 1024)) ]] || \ + log_fail "Incorrect space added on attach: $new_cap" + +log_must zpool attach $TESTPOOL2 anyraid1-0 /$TESTPOOL/vdev_file.5 +new_cap=$(zpool get -Hp -o value size $TESTPOOL2) +new_cap=$((new_cap - cap)) +[[ "$new_cap" -eq $(((2048 - 256 - 64) * 1024 * 1024)) ]] || \ + log_fail "Incorrect space added on attach: $new_cap" + +log_must zpool destroy $TESTPOOL2 +log_must create_pool $TESTPOOL2 anyraid2 /$TESTPOOL/vdev_file.{0,1,2,3} + +cap=$(zpool get -Hp -o value size $TESTPOOL2) +log_must zpool attach $TESTPOOL2 anyraid2-0 /$TESTPOOL/vdev_file.4 +new_cap=$(zpool get -Hp -o value size $TESTPOOL2) +new_cap=$((new_cap - cap)) + +[[ "$new_cap" -eq $((64 * 1024 * 1024)) ]] || \ + log_fail "Incorrect space added on attach: $new_cap" + +log_must zpool attach $TESTPOOL2 anyraid2-0 /$TESTPOOL/vdev_file.5 +new_cap=$(zpool get -Hp -o value size $TESTPOOL2) +new_cap=$((new_cap - cap)) +[[ "$new_cap" -eq $((256 * 1024 * 1024)) ]] || \ + log_fail "Incorrect space added on attach: $new_cap" + +log_must zpool attach $TESTPOOL2 anyraid2-0 /$TESTPOOL/vdev_file.6 +new_cap=$(zpool get -Hp -o value size $TESTPOOL2) +new_cap=$((new_cap - cap)) +[[ "$new_cap" -eq $(((2048 - 256 - 64) * 1024 * 1024)) ]] || \ + log_fail "Incorrect space added on attach: $new_cap" + +log_pass "'zpool attach' expands size correctly with anyraid vdevs" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/cleanup.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/cleanup.ksh index f504d15fc0c3..428c769444cf 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/cleanup.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/cleanup.ksh @@ -34,5 +34,7 @@ . $STF_SUITE/tests/functional/cli_root/zpool_create/zpool_create.shlib cleanup_devices $DISKS +delete_sparse_files +rm -rf $TESTDIR $TESTDIR1 log_pass diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib index ecab30ed3925..bbe68f8db24f 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib @@ -36,7 +36,7 @@ # Given a pool vdevs list, create the pool,verify the created pool, # and destroy the pool # $1, pool name -# $2, pool type, mirror, raidz, or none +# $2, pool type, mirror, raidz, anyraid, draid or none # $3, vdevs list # function create_pool_test diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_001_pos.ksh index ad30c0fc87f9..16a98864e138 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_001_pos.ksh @@ -49,8 +49,6 @@ verify_runnable "global" function cleanup { poolexists $TESTPOOL && destroy_pool $TESTPOOL - - rm -f $disk1 $disk2 } log_assert "'zpool create ...' can successfully create" \ @@ -58,16 +56,16 @@ log_assert "'zpool create ...' can successfully create" \ log_onexit cleanup -typeset disk1=$(create_blockfile $FILESIZE) -typeset disk2=$(create_blockfile $FILESIZE) +create_sparse_files "disk" 4 $MINVDEVSIZE2 pooldevs="${DISK0} \ \"${DISK0} ${DISK1}\" \ \"${DISK0} ${DISK1} ${DISK2}\" \ - \"$disk1 $disk2\"" + \"$disk0 $disk1\"" mirrordevs="\"${DISK0} ${DISK1}\" \ $raidzdevs \ - \"$disk1 $disk2\"" + \"$disk0 $disk1\"" +anyraiddevs="\"$disk0 $disk1 $disk2 $disk3\"" raidzdevs="\"${DISK0} ${DISK1} ${DISK2}\"" draiddevs="\"${DISK0} ${DISK1} ${DISK2}\"" @@ -75,6 +73,11 @@ create_pool_test "$TESTPOOL" "" "$pooldevs" create_pool_test "$TESTPOOL" "mirror" "$mirrordevs" create_pool_test "$TESTPOOL" "raidz" "$raidzdevs" create_pool_test "$TESTPOOL" "raidz1" "$raidzdevs" +create_pool_test "$TESTPOOL" "anyraid" "$anyraiddevs" +create_pool_test "$TESTPOOL" "anyraid0" "$anyraiddevs" +create_pool_test "$TESTPOOL" "anyraid1" "$anyraiddevs" +create_pool_test "$TESTPOOL" "anyraid2" "$anyraiddevs" +create_pool_test "$TESTPOOL" "anyraid3" "$anyraiddevs" create_pool_test "$TESTPOOL" "draid" "$draiddevs" log_pass "'zpool create ...' success." diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh index f0c2e69a0c0f..b7f3041342ff 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh @@ -55,7 +55,7 @@ log_assert "'zpool create [-R root][-m mountpoint] ...' can create "an alternate pool or a new pool mounted at the specified mountpoint." log_onexit cleanup -set -A pooltype "" "mirror" "raidz" "raidz1" "raidz2" "draid" "draid2" +set -A pooltype "" "mirror" "raidz" "raidz1" "raidz2" "anyraid" "anyraid0" "anyraid1" "anyraid2" "anyraid3" "draid" "draid2" # # cleanup the pools created in previous case if zpool_create_004_pos timedout @@ -69,7 +69,7 @@ rm -rf $TESTDIR log_must mkdir -p $TESTDIR typeset -i i=1 while (( i < 5 )); do - log_must truncate -s $FILESIZE $TESTDIR/file.$i + log_must truncate -s $MINVDEVSIZE2 $TESTDIR/file.$i (( i = i + 1 )) done diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_006_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_006_pos.ksh index adc47c48de28..5d15fec2707c 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_006_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_006_pos.ksh @@ -60,7 +60,7 @@ mntpnt=$(get_prop mountpoint $TESTPOOL) typeset -i i=0 while ((i < 10)); do - log_must truncate -s $MINVDEVSIZE $mntpnt/vdev$i + log_must truncate -s $MINVDEVSIZE2 $mntpnt/vdev$i eval vdev$i=$mntpnt/vdev$i ((i += 1)) @@ -98,6 +98,11 @@ set -A valid_args \ "raidz2 $vdev0 $vdev1 $vdev2 spare $vdev3 raidz2 $vdev4 $vdev5 $vdev6" \ "raidz3 $vdev0 $vdev1 $vdev2 $vdev3 \ mirror $vdev4 $vdev5 $vdev6 $vdev7" \ + "anyraid0 $vdev0" \ + "anyraid0 $vdev0 $vdev1 anyraid0 $vdev2 $vdev3" \ + "anyraid1 $vdev0 $vdev1 anyraid1 $vdev2 $vdev3" \ + "anyraid2 $vdev0 $vdev1 $vdev2 anyraid2 $vdev3 $vdev4 $vdev5" \ + "anyraid3 $vdev0 $vdev1 $vdev2 $vdev3 anyraid3 $vdev4 $vdev5 $vdev6 $vdev7" \ "draid $vdev0 $vdev1 $vdev2 mirror $vdev3 $vdev4" \ "draid $vdev0 $vdev1 $vdev2 raidz1 $vdev3 $vdev4 $vdev5" \ "draid $vdev0 $vdev1 $vdev2 draid1 $vdev3 $vdev4 $vdev5" \ @@ -133,6 +138,10 @@ set -A forced_args \ spare $vdev4 raidz2 $vdev5 $vdev6 $vdev7" \ "mirror $vdev0 $vdev1 draid $vdev2 $vdev3 $vdev4 \ draid2 $vdev5 $vdev6 $vdev7 $vdev8 spare $vdev9" \ + "anyraid0 $vdev0 anyraid $vdev1 $vdev2" \ + "anyraid1 $vdev0 $vdev1 anyraid2 $vdev2 $vdev3 $vdev4" \ + "anyraid2 $vdev0 $vdev1 $vdev2 $vdev3 anyraid2 $vdev4 $vdev5 $vdev6" \ + "anyraid3 $vdev0 $vdev1 $vdev2 $vdev3 anyraid0 $vdev4" \ "draid $vdev0 $vdev1 $vdev2 $vdev3 \ draid2 $vdev4 $vdev5 $vdev6 $vdev7 $vdev8" \ "draid $vdev0 $vdev1 $vdev2 draid $vdev4 $vdev5 $vdev6 \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_007_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_007_neg.ksh index 2e377bc3b522..82bbf79441b2 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_007_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_007_neg.ksh @@ -56,6 +56,11 @@ set -A args "" "-?" "-n" "-f" "-nf" "-fn" "-f -n" "--f" "-e" "-s" \ "$TESTPOOL mirror" "$TESTPOOL raidz" "$TESTPOOL mirror raidz" \ "$TESTPOOL raidz1" "$TESTPOOL mirror raidz1" \ "$TESTPOOL draid1" "$TESTPOOL mirror draid1" \ + "$TESTPOOL anyraid" "$TESTPOOL mirror anyraid" \ + "$TESTPOOL anyraid0" "$TESTPOOL mirror anyraid0" \ + "$TESTPOOL anyraid1 $DISK0" \ + "$TESTPOOL anyraid2 $DISK0 $DISK1" \ + "$TESTPOOL anyraid3 $DISK0 $DISK1 $DISK2" \ "$TESTPOOL mirror c?t?d?" "$TESTPOOL mirror $DISK0 c0t1d?" \ "$TESTPOOL RAIDZ $DISK0 $DISK1" \ "$TESTPOOL $DISK0 log $DISK1 log $DISK2" \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_009_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_009_neg.ksh index 7656f5bb4fdf..fce791caaf3d 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_009_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_009_neg.ksh @@ -39,10 +39,12 @@ # devices, 'zpool create' should failed. # # STRATEGY: -# 1. Loop to create the following three kinds of pools. +# 1. Loop to create the following kinds of pools: # - Regular pool # - Mirror # - Raidz +# - AnyRAID +# - dRAID # 2. Create two pools but using the same disks, expect failed. # 3. Create one pool but using the same disks twice, expect failed. # @@ -62,13 +64,15 @@ log_assert "Create a pool with same devices twice or create two pools with " \ "same devices, 'zpool create' should fail." log_onexit cleanup +create_sparse_files "file" 4 $MINVDEVSIZE2 + unset NOINUSE_CHECK typeset opt -for opt in "" "mirror" "raidz" "draid"; do +for opt in "" "mirror" "raidz" "anyraid" "draid"; do if [[ $opt == "" ]]; then - typeset disks=$DISK0 + typeset disks=$file0 else - typeset disks=$DISKS + typeset disks=$files fi # Create two pools but using the same disks. @@ -78,7 +82,7 @@ for opt in "" "mirror" "raidz" "draid"; do # Create two pools and part of the devices were overlapped create_pool $TESTPOOL $opt $disks - log_mustnot zpool create -f $TESTPOOL1 $opt $DISK0 + log_mustnot zpool create -f $TESTPOOL1 $opt $file0 destroy_pool $TESTPOOL # Create one pool but using the same disks twice. diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_010_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_010_neg.ksh index 6d43227481bf..c08ba6afdaca 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_010_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_010_neg.ksh @@ -73,6 +73,7 @@ set -A args \ "$TOOSMALL $TESTDIR/file1" "$TESTPOOL1 $TESTDIR/file1 $TESTDIR/file2" \ "$TOOSMALL mirror $TESTDIR/file1 $TESTDIR/file2" \ "$TOOSMALL raidz $TESTDIR/file1 $TESTDIR/file2" \ + "$TOOSMALL anyraid0 $TESTDIR/file1" \ "$TOOSMALL draid $TESTDIR/file1 $TESTDIR/file2 $TESTDIR/file3" typeset -i i=0 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_001_pos.ksh new file mode 100755 index 000000000000..544d5c715fe7 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_001_pos.ksh @@ -0,0 +1,63 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Create a variety of AnyRAID pools using the minimal vdev syntax. +# +# STRATEGY: +# 1. Create the required number of allowed AnyRAID vdevs. +# 2. Create few pools of various sizes using the anyraid* syntax. +# + +verify_runnable "global" + +function cleanup +{ + poolexists $TESTPOOL && destroy_pool $TESTPOOL +} + +log_assert "'zpool create ...' can create a pool." +log_onexit cleanup + +create_sparse_files "disk" 4 $MINVDEVSIZE2 + +# Verify the default parity +log_must zpool create $TESTPOOL anyraid $disks +log_must poolexists $TESTPOOL +destroy_pool $TESTPOOL + +# Verify specified parity +for parity in {0..3}; do + log_must zpool create $TESTPOOL anyraid$parity $disks + log_must poolexists $TESTPOOL + destroy_pool $TESTPOOL +done + +log_pass "'zpool create ...' can create a pool." diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_002_pos.ksh new file mode 100755 index 000000000000..a5d0eb0928bd --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_002_pos.ksh @@ -0,0 +1,69 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Create AnyRAID pool using the maximum number of vdevs (255). Then verify +# that creating a pool with 256 fails as expected. +# +# STRATEGY: +# 1. Verify a pool with fewer than the required vdevs fails. +# 2. Verify pools with a valid number of vdevs succeed. +# 3. Verify a pool which exceeds the maximum number of vdevs fails. +# + +verify_runnable "global" + +function cleanup +{ + poolexists $TESTPOOL && destroy_pool $TESTPOOL + + rm -f $all_vdevs + rmdir $TESTDIR +} + +log_assert "'zpool create anyraid ...' can create a pool with maximum number of vdevs." +log_onexit cleanup + +all_vdevs=$(echo $TESTDIR/file.{01..256}) + +mkdir $TESTDIR +log_must truncate -s $MINVDEVSIZE2 $all_vdevs + +# Verify pool sizes from 254-255. +for (( i=254; i<=255; i++ )); do + log_must zpool create $TESTPOOL anyraid3 \ + $(echo $TESTDIR/file.{01..$i}) + log_must destroy_pool $TESTPOOL +done + +# Exceeds maximum AnyRAID vdev count (256). +log_mustnot zpool create $TESTPOOL anyraid3 $(echo $TESTDIR/file.{01..256}) + +log_pass "'zpool create anyraid ...' can create a pool with maximum number of vdevs." diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_003_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_003_pos.ksh new file mode 100755 index 000000000000..7ee7c304eb6f --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_003_pos.ksh @@ -0,0 +1,61 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_create/zpool_create.shlib + +# +# DESCRIPTION: +# Verify that AnyRAID vdev can be created using disks of different sizes. +# +# STRATEGY: +# 1. Create a pool using disks of different sizes. +# 2. Verify the pool created successfully. +# + +verify_runnable "global" + +function cleanup +{ + poolexists $TESTPOOL && destroy_pool $TESTPOOL +} + +log_assert "'zpool create anyraid* ...' can create a pool with disks of various sizes." +log_onexit cleanup + +create_sparse_files "Adisk" 3 $(( $MINVDEVSIZE2 * 1 )) +create_sparse_files "Bdisk" 2 $(( $MINVDEVSIZE2 * 2 )) +create_sparse_files "Cdisk" 1 $(( $MINVDEVSIZE2 * 3 )) +ls -lh $Adisks $Bdisks $Cdisks + +for parity in {0..3}; do + log_must zpool create $TESTPOOL anyraid$parity $Cdisks $Adisks $Bdisks + log_must poolexists $TESTPOOL + destroy_pool $TESTPOOL +done + +log_pass "'zpool create anyraid* ...' can create a pool with disks of various sizes." diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_export/cleanup.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_export/cleanup.ksh index 66de31744a96..5dce6bec18fd 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_export/cleanup.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_export/cleanup.ksh @@ -28,4 +28,5 @@ . $STF_SUITE/include/libtest.shlib +delete_sparse_files default_cleanup diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_anyraid_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_anyraid_001_pos.ksh new file mode 100755 index 000000000000..8f3db4b3e424 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_anyraid_001_pos.ksh @@ -0,0 +1,61 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# An AnyRAID pool should be exportable and not visible from 'zpool list'. +# +# STRATEGY: +# 1. Create AnyRAID pool. +# 2. Export the pool. +# 3. Verify the pool is no longer present in the list output. +# + +verify_runnable "global" + +function cleanup +{ + poolexists $TESTPOOL && destroy_pool $TESTPOOL +} + +log_assert "Verify an AnyRAID pool can be exported." +log_onexit cleanup + +poolexists $TESTPOOL && destroy_pool $TESTPOOL + +create_sparse_files "disk" 4 $MINVDEVSIZE2 + +log_must zpool create $TESTPOOL anyraid3 $disks +log_must poolexists $TESTPOOL +log_must zpool export $TESTPOOL + +poolexists $TESTPOOL && \ + log_fail "$TESTPOOL unexpectedly found in 'zpool list' output." + +log_pass "Successfully exported an AnyRAID pool." diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg index bdf5fdf85cff..05dde1fea8dd 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg @@ -93,6 +93,7 @@ typeset -a properties=( "feature@redaction_list_spill" "feature@dynamic_gang_header" "feature@physical_rewrite" + "feature@anyraid" ) if is_linux || is_freebsd; then diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_010_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_010_pos.ksh index ce1c103cd3c3..841b8693ec16 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_010_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_010_pos.ksh @@ -43,15 +43,18 @@ # 3. Create a draid2 pool C with dev2/3/4/5. Then destroy it. # 4. Create a raidz pool D with dev3/4. Then destroy it. # 5. Create a stripe pool E with dev4. Then destroy it. -# 6. Verify 'zpool import -D -a' recover all the pools. +# 6. Create an anyraid pool F with dev6. Then destroy it. +# 7. Verify 'zpool import -D -a' recover all the pools. # verify_runnable "global" +VDEV6="$DEVICE_DIR/disk6_anyraid" + function cleanup { typeset dt - for dt in $poolE $poolD $poolC $poolB $poolA; do + for dt in $poolF $poolE $poolD $poolC $poolB $poolA; do destroy_pool $dt done @@ -67,7 +70,7 @@ log_assert "'zpool -D -a' can import all the specified directories " \ "destroyed pools." log_onexit cleanup -poolA=poolA.$$; poolB=poolB.$$; poolC=poolC.$$; poolD=poolD.$$; poolE=poolE.$$ +poolA=poolA.$$; poolB=poolB.$$; poolC=poolC.$$; poolD=poolD.$$; poolE=poolE.$$; poolF=poolF.$$; log_must zpool create $poolA mirror $VDEV0 $VDEV1 $VDEV2 $VDEV3 $VDEV4 log_must zpool destroy $poolA @@ -84,9 +87,13 @@ log_must zpool destroy $poolD log_must zpool create $poolE $VDEV4 log_must zpool destroy $poolE +truncate -s 24G $VDEV6 +log_must zpool create $poolF anyraid0 $VDEV6 +log_must zpool destroy $poolF + log_must zpool import -d $DEVICE_DIR -D -f -a -for dt in $poolA $poolB $poolC $poolD $poolE; do +for dt in $poolA $poolB $poolC $poolD $poolE $poolF; do log_must datasetexists $dt done diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/cleanup.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/cleanup.ksh index a3beee135954..b4204014d573 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/cleanup.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/cleanup.ksh @@ -29,4 +29,6 @@ verify_runnable "global" +restore_tunable ANYRAID_MIN_TILE_SIZE + default_cleanup diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/setup.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/setup.ksh new file mode 100755 index 000000000000..1210475b12f7 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/setup.ksh @@ -0,0 +1,35 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "global" + +save_tunable ANYRAID_MIN_TILE_SIZE +set_tunable64 ANYRAID_MIN_TILE_SIZE 67108864 + +log_pass diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_anyraid_attach.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_anyraid_attach.ksh new file mode 100755 index 000000000000..9c6959e913ef --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_anyraid_attach.ksh @@ -0,0 +1,56 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2016 by Delphix. All rights reserved. +# +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib + +# +# DESCRIPTION: +# Attaching data devices works with initializing for AnyRAID1. +# +# STRATEGY: +# 1. Create an AnyRAID1 pool. +# 2. Start initializing of the first disk. +# 3. Attach a third disk, ensure initializing continues. +# + +DISK1="$(echo $DISKS | cut -d' ' -f1)" +DISK2="$(echo $DISKS | cut -d' ' -f2)" +DISK3="$(echo $DISKS | cut -d' ' -f3)" + +log_must zpool create -f $TESTPOOL anyraid1 $DISK1 $DISK2 + +log_must zpool initialize $TESTPOOL $DISK1 +progress="$(initialize_progress $TESTPOOL $DISK1)" +[[ -z "$progress" ]] && log_fail "Initializing did not start" + +log_must zpool attach $TESTPOOL anyraid1-0 $DISK3 +new_progress="$(initialize_progress $TESTPOOL $DISK1)" +[[ "$progress" -le "$new_progress" ]] || \ + log_fail "Lost initializing progress on AnyRAID1 attach" +progress="$new_progress" + +log_pass "Attaching data devices works with initializing for AnyRAID1" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_fault_export_import_online.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_fault_export_import_online.ksh index 26c369be5bee..48a86d5f9400 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_fault_export_import_online.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_fault_export_import_online.ksh @@ -30,31 +30,42 @@ # 1. Create a pool with a two-way mirror. # 2. Start initializing, fault, export, import, online and verify along # the way that the initializing was cancelled and not restarted. +# 3. Repeat for AnyRAID1. # DISK1="$(echo $DISKS | cut -d' ' -f1)" DISK2="$(echo $DISKS | cut -d' ' -f2)" -log_must zpool create -f $TESTPOOL mirror $DISK1 $DISK2 +for type in "mirror" "anyraid1"; do + log_must zpool create -f $TESTPOOL $type $DISK1 $DISK2 + if [[ "$type" == "anyraid1" ]]; then + log_must dd if=/dev/urandom of=/$TESTPOOL/f1 bs=1M count=2k + log_must zpool sync + log_must rm /$TESTPOOL/f1 + fi -log_must zpool initialize $TESTPOOL $DISK1 -progress="$(initialize_progress $TESTPOOL $DISK1)" -[[ -z "$progress" ]] && log_fail "Initializing did not start" + log_must zpool initialize $TESTPOOL $DISK1 + progress="$(initialize_progress $TESTPOOL $DISK1)" + [[ -z "$progress" ]] && log_fail "Initializing did not start" -log_must zpool offline -f $TESTPOOL $DISK1 -log_must check_vdev_state $TESTPOOL $DISK1 "FAULTED" -log_must eval "zpool status -i $TESTPOOL | grep $DISK1 | grep uninitialized" + log_must zpool offline -f $TESTPOOL $DISK1 + log_must zpool sync $TESTPOOL + log_must check_vdev_state $TESTPOOL $DISK1 "FAULTED" + log_must eval "zpool status -i $TESTPOOL | grep $DISK1 | grep uninitialized" -log_must zpool export $TESTPOOL -log_must zpool import $TESTPOOL + log_must zpool export $TESTPOOL + log_must zpool import $TESTPOOL -log_must check_vdev_state $TESTPOOL $DISK1 "FAULTED" -log_must eval "zpool status -i $TESTPOOL | grep $DISK1 | grep uninitialized" + log_must check_vdev_state $TESTPOOL $DISK1 "FAULTED" + log_must eval "zpool status -i $TESTPOOL | grep $DISK1 | grep uninitialized" -log_must zpool online $TESTPOOL $DISK1 -log_must zpool clear $TESTPOOL $DISK1 -log_must check_vdev_state $TESTPOOL $DISK1 "ONLINE" -log_must eval "zpool status -i $TESTPOOL | grep $DISK1 | grep uninitialized" + log_must zpool online $TESTPOOL $DISK1 + log_must zpool clear $TESTPOOL $DISK1 + log_must check_vdev_state $TESTPOOL $DISK1 "ONLINE" + log_must eval "zpool status -i $TESTPOOL | grep $DISK1 | grep uninitialized" + + poolexists $TESTPOOL && destroy_pool $TESTPOOL +done log_pass "Initializing behaves as expected at each step of:" \ "initialize + fault + export + import + online" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_import_export.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_import_export.ksh index 341f4f75cf7d..f8709875c757 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_import_export.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_import_export.ksh @@ -32,7 +32,7 @@ # Initializing automatically resumes across import/export. # # STRATEGY: -# 1. Create a one-disk pool. +# 1. Create a pool. # 2. Start initializing and verify that initializing is active. # 3. Export the pool. # 4. Import the pool. @@ -40,40 +40,52 @@ # 6. Suspend initializing. # 7. Repeat steps 3-4. # 8. Verify that progress does not regress but initializing is still suspended. +# 9. Repeat for other VDEV types. # -DISK1=${DISKS%% *} +DISK1="$(echo $DISKS | cut -d' ' -f1)" +DISK2="$(echo $DISKS | cut -d' ' -f2)" -log_must zpool create -f $TESTPOOL $DISK1 -log_must zpool initialize $TESTPOOL +for type in "" "anyraid1"; do + if [[ "$type" = "" ]]; then + VDEVS="$DISK1" + elif [[ "$type" = "anyraid1" ]]; then + VDEVS="$DISK1 $DISK2" + fi -sleep 2 + log_must zpool create -f $TESTPOOL $type $VDEVS + log_must zpool initialize $TESTPOOL -progress="$(initialize_progress $TESTPOOL $DISK1)" -[[ -z "$progress" ]] && log_fail "Initializing did not start" + sleep 2 -log_must zpool export $TESTPOOL -log_must zpool import $TESTPOOL + progress="$(initialize_progress $TESTPOOL $DISK1)" + [[ -z "$progress" ]] && log_fail "Initializing did not start" -new_progress="$(initialize_progress $TESTPOOL $DISK1)" -[[ -z "$new_progress" ]] && log_fail "Initializing did not restart after import" -[[ "$progress" -le "$new_progress" ]] || \ - log_fail "Initializing lost progress after import" -log_mustnot eval "initialize_prog_line $TESTPOOL $DISK1 | grep suspended" + log_must zpool export $TESTPOOL + log_must zpool import $TESTPOOL -log_must zpool initialize -s $TESTPOOL $DISK1 -action_date="$(initialize_prog_line $TESTPOOL $DISK1 | \ - sed 's/.*ed at \(.*\)).*/\1/g')" -log_must zpool export $TESTPOOL -log_must zpool import $TESTPOOL -new_action_date=$(initialize_prog_line $TESTPOOL $DISK1 | \ - sed 's/.*ed at \(.*\)).*/\1/g') -[[ "$action_date" != "$new_action_date" ]] && \ - log_fail "Initializing action date did not persist across export/import" + new_progress="$(initialize_progress $TESTPOOL $DISK1)" + [[ -z "$new_progress" ]] && log_fail "Initializing did not restart after import" + [[ "$progress" -le "$new_progress" ]] || \ + log_fail "Initializing lost progress after import" + log_mustnot eval "initialize_prog_line $TESTPOOL $DISK1 | grep suspended" -[[ "$new_progress" -le "$(initialize_progress $TESTPOOL $DISK1)" ]] || \ - log_fail "Initializing lost progress after import" + log_must zpool initialize -s $TESTPOOL $DISK1 + action_date="$(initialize_prog_line $TESTPOOL $DISK1 | \ + sed 's/.*ed at \(.*\)).*/\1/g')" + log_must zpool export $TESTPOOL + log_must zpool import $TESTPOOL + new_action_date=$(initialize_prog_line $TESTPOOL $DISK1 | \ + sed 's/.*ed at \(.*\)).*/\1/g') + [[ "$action_date" != "$new_action_date" ]] && \ + log_fail "Initializing action date did not persist across export/import" -log_must eval "initialize_prog_line $TESTPOOL $DISK1 | grep suspended" + [[ "$new_progress" -le "$(initialize_progress $TESTPOOL $DISK1)" ]] || \ + log_fail "Initializing lost progress after import" + + log_must eval "initialize_prog_line $TESTPOOL $DISK1 | grep suspended" + + poolexists $TESTPOOL && destroy_pool $TESTPOOL +done log_pass "Initializing retains state as expected across export/import" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_offline_export_import_online.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_offline_export_import_online.ksh index 89eace601577..419aea25c91b 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_offline_export_import_online.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_offline_export_import_online.ksh @@ -36,32 +36,45 @@ # 2. Start initializing, offline, export, import, online and verify that # initializing state is preserved / initializing behaves as expected # at each step. +# 3. Repeat for other VDEV types. # DISK1="$(echo $DISKS | cut -d' ' -f1)" DISK2="$(echo $DISKS | cut -d' ' -f2)" +DISK3="$(echo $DISKS | cut -d' ' -f3)" -log_must zpool create -f $TESTPOOL mirror $DISK1 $DISK2 +for type in "mirror" "anyraid1"; do -log_must zpool initialize $TESTPOOL $DISK1 -log_must zpool offline $TESTPOOL $DISK1 -progress="$(initialize_progress $TESTPOOL $DISK1)" -[[ -z "$progress" ]] && log_fail "Initializing did not start" -log_mustnot eval "initialize_prog_line $TESTPOOL $DISK1 | grep suspended" + if [[ "$type" =~ "anyraid" ]]; then + export disks="$DISK1 $DISK2 $DISK3" + else + export disks="$DISK1 $DISK2" + fi + log_must zpool create -f $TESTPOOL $type $disks -log_must zpool export $TESTPOOL -log_must zpool import $TESTPOOL + log_must zpool initialize $TESTPOOL $DISK1 + log_must zpool offline $TESTPOOL $DISK1 + progress="$(initialize_progress $TESTPOOL $DISK1)" + [[ -z "$progress" ]] && log_fail "Initializing did not start" + log_mustnot eval "initialize_prog_line $TESTPOOL $DISK1 | grep suspended" -new_progress="$(initialize_progress $TESTPOOL $DISK1)" -[[ -z "$new_progress" ]] && log_fail "Initializing did not start after import" -[[ "$new_progress" -ge "$progress" ]] || \ - log_fail "Initializing lost progress after import" -log_mustnot eval "initialize_prog_line $TESTPOOL $DISK1 | grep suspended" + log_must zpool export $TESTPOOL + log_must zpool import $TESTPOOL -log_must zpool online $TESTPOOL $DISK1 -new_progress="$(initialize_progress $TESTPOOL $DISK1)" -[[ "$new_progress" -ge "$progress" ]] || \ - log_fail "Initializing lost progress after online" + new_progress="$(initialize_progress $TESTPOOL $DISK1)" + [[ -z "$new_progress" ]] && log_fail "Initializing did not start after import" + [[ "$new_progress" -ge "$progress" ]] || \ + log_fail "Initializing lost progress after import" + log_mustnot eval "initialize_prog_line $TESTPOOL $DISK1 | grep suspended" + + log_must zpool online $TESTPOOL $DISK1 + new_progress="$(initialize_progress $TESTPOOL $DISK1)" + [[ "$new_progress" -ge "$progress" ]] || \ + log_fail "Initializing lost progress after online" + + poolexists $TESTPOOL && destroy_pool $TESTPOOL + +done log_pass "Initializing behaves as expected at each step of:" \ "initialize + offline + export + import + online" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_online_offline.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_online_offline.ksh index 10721c1f6cb2..91b1d39ce4e4 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_online_offline.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_online_offline.ksh @@ -39,37 +39,53 @@ # 5. Verify that initializing resumes and progress does not regress. # 6. Suspend initializing. # 7. Repeat steps 3-4 and verify that initializing does not resume. +# 8. Repeat the scenario for other VDEVs # DISK1=${DISKS%% *} DISK2="$(echo $DISKS | cut -d' ' -f2)" +DISK3="$(echo $DISKS | cut -d' ' -f3)" -log_must zpool create -f $TESTPOOL mirror $DISK1 $DISK2 -log_must zpool initialize $TESTPOOL $DISK1 +for type in "mirror" "anyraid1"; do -log_must zpool offline $TESTPOOL $DISK1 + if [[ "$type" == "mirror" ]]; then + log_must zpool create -f $TESTPOOL $type $DISK1 $DISK2 + else + log_must zpool create -f $TESTPOOL $type $DISK1 $DISK2 $DISK3 + log_must dd if=/dev/urandom of=/$TESTPOOL/f1 bs=1M count=400 + log_must zpool sync + log_must rm /$TESTPOOL/f1 + log_must zpool sync + fi + log_must zpool initialize $TESTPOOL $DISK1 -progress="$(initialize_progress $TESTPOOL $DISK1)" -[[ -z "$progress" ]] && log_fail "Initializing did not start" + log_must zpool offline $TESTPOOL $DISK1 -log_must zpool online $TESTPOOL $DISK1 + progress="$(initialize_progress $TESTPOOL $DISK1)" + [[ -z "$progress" ]] && log_fail "Initializing did not start" -new_progress="$(initialize_progress $TESTPOOL $DISK1)" -[[ -z "$new_progress" ]] && \ - log_fail "Initializing did not restart after onlining" -[[ "$progress" -le "$new_progress" ]] || \ - log_fail "Initializing lost progress after onlining" -log_mustnot eval "initialize_prog_line $TESTPOOL $DISK1 | grep suspended" + log_must zpool online $TESTPOOL $DISK1 -log_must zpool initialize -s $TESTPOOL $DISK1 -action_date="$(initialize_prog_line $TESTPOOL $DISK1 | \ - sed 's/.*ed at \(.*\)).*/\1/g')" -log_must zpool offline $TESTPOOL $DISK1 -log_must zpool online $TESTPOOL $DISK1 -new_action_date=$(initialize_prog_line $TESTPOOL $DISK1 | \ - sed 's/.*ed at \(.*\)).*/\1/g') -[[ "$action_date" != "$new_action_date" ]] && \ - log_fail "Initializing action date did not persist across offline/online" -log_must eval "initialize_prog_line $TESTPOOL $DISK1 | grep suspended" + new_progress="$(initialize_progress $TESTPOOL $DISK1)" + [[ -z "$new_progress" ]] && \ + log_fail "Initializing did not restart after onlining" + [[ "$progress" -le "$new_progress" ]] || \ + log_fail "Initializing lost progress after onlining" + log_mustnot eval "initialize_prog_line $TESTPOOL $DISK1 | grep suspended" + + log_must zpool initialize -s $TESTPOOL $DISK1 + action_date="$(initialize_prog_line $TESTPOOL $DISK1 | \ + sed 's/.*ed at \(.*\)).*/\1/g')" + log_must zpool offline $TESTPOOL $DISK1 + log_must zpool online $TESTPOOL $DISK1 + new_action_date=$(initialize_prog_line $TESTPOOL $DISK1 | \ + sed 's/.*ed at \(.*\)).*/\1/g') + [[ "$action_date" != "$new_action_date" ]] && \ + log_fail "Initializing action date did not persist across offline/online" + log_must eval "initialize_prog_line $TESTPOOL $DISK1 | grep suspended" + + poolexists $TESTPOOL && destroy_pool $TESTPOOL + +done log_pass "Initializing performs as expected across offline/online" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_neg.ksh index 79bf0b6a2d08..0dad44cf54bd 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_neg.ksh @@ -37,25 +37,37 @@ # 2. Start initializing and verify that initializing is active. # 3. Try to cancel and suspend initializing on the non-initializing disks. # 4. Try to re-initialize the currently initializing disk. +# 5. Repeat for other VDEVs # DISK1=${DISKS%% *} DISK2="$(echo $DISKS | cut -d' ' -f2)" DISK3="$(echo $DISKS | cut -d' ' -f3)" -log_must zpool list -v -log_must zpool create -f $TESTPOOL $DISK1 $DISK2 $DISK3 -log_must zpool initialize $TESTPOOL $DISK1 +for type in "" "anyraid2"; do -[[ -z "$(initialize_progress $TESTPOOL $DISK1)" ]] && \ - log_fail "Initialize did not start" + log_must zpool list -v + log_must zpool create -f $TESTPOOL $type $DISK1 $DISK2 $DISK3 + if [[ "$type" == "anyraid2" ]]; then + log_must dd if=/dev/urandom of=/$TESTPOOL/f1 bs=1M count=2k + log_must zpool sync + log_must rm /$TESTPOOL/f1 + fi + log_must zpool initialize $TESTPOOL $DISK1 -log_mustnot zpool initialize -c $TESTPOOL $DISK2 -log_mustnot zpool initialize -c $TESTPOOL $DISK2 $DISK3 + [[ -z "$(initialize_progress $TESTPOOL $DISK1)" ]] && \ + log_fail "Initialize did not start" -log_mustnot zpool initialize -s $TESTPOOL $DISK2 -log_mustnot zpool initialize -s $TESTPOOL $DISK2 $DISK3 + log_mustnot zpool initialize -c $TESTPOOL $DISK2 + log_mustnot zpool initialize -c $TESTPOOL $DISK2 $DISK3 -log_mustnot zpool initialize $TESTPOOL $DISK1 + log_mustnot zpool initialize -s $TESTPOOL $DISK2 + log_mustnot zpool initialize -s $TESTPOOL $DISK2 $DISK3 + + log_mustnot zpool initialize $TESTPOOL $DISK1 + + poolexists $TESTPOOL && destroy_pool $TESTPOOL + +done log_pass "Nonsensical initialize operations fail" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_pos.ksh index f872246a0661..a4a91e8d122d 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_pos.ksh @@ -35,19 +35,26 @@ # 1. Create a one-disk pool. # 2. Start initializing and verify that initializing is active. # 3. Cancel initializing and verify that initializing is not active. +# 4. Repeat for other VDEVs # DISK1=${DISKS%% *} -log_must zpool create -f $TESTPOOL $DISK1 -log_must zpool initialize $TESTPOOL +for type in "" "anyraid0"; do -[[ -z "$(initialize_progress $TESTPOOL $DISK1)" ]] && \ - log_fail "Initialize did not start" + log_must zpool create -f $TESTPOOL $type $DISK1 + log_must zpool initialize $TESTPOOL -log_must zpool initialize -c $TESTPOOL + [[ -z "$(initialize_progress $TESTPOOL $DISK1)" ]] && \ + log_fail "Initialize did not start" -[[ -z "$(initialize_progress $TESTPOOL $DISK1)" ]] || \ - log_fail "Initialize did not stop" + log_must zpool initialize -c $TESTPOOL + + [[ -z "$(initialize_progress $TESTPOOL $DISK1)" ]] || \ + log_fail "Initialize did not stop" + + poolexists $TESTPOOL && destroy_pool $TESTPOOL + +done log_pass "Initialize start + cancel works" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh index 6c75146af6b7..22cb4fbd792c 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh @@ -40,7 +40,8 @@ # b. Verify uninitialize fails when actively initializing. # c. Cancel or suspend initializing and verify that initializing is not active. # d. Verify uninitialize succeeds after being cancelled. -# 4. Verify per-disk cancel|suspend + uninit +# 4. Verify per-disk cancel|suspend + uninit. +# 5. Repeat for other VDEVs. # DISK1="$(echo $DISKS | cut -d' ' -f1)" @@ -78,65 +79,76 @@ function status_check_all # pool disk-state status_check "$pool" "$disk_state" "$disk_state" "$disk_state" } -# 1. Create a one-disk pool. -log_must zpool create -f $TESTPOOL $DISK1 $DISK2 $DISK3 -status_check_all $TESTPOOL "uninitialized" +for type in "" "anyraid1"; do -# 2. Verify uninitialize succeeds for uninitialized pool. -log_must zpool initialize -u $TESTPOOL -status_check_all $TESTPOOL "uninitialized" + # 1. Create a one-disk pool. + log_must zpool create -f $TESTPOOL $type $DISK1 $DISK2 $DISK3 + status_check_all $TESTPOOL "uninitialized" + if [[ "$type" == "anyraid1" ]]; then + log_must dd if=/dev/urandom of=/$TESTPOOL/f1 bs=1M count=2k + log_must zpool sync + log_must rm /$TESTPOOL/f1 + fi + + # 2. Verify uninitialize succeeds for uninitialized pool. + log_must zpool initialize -u $TESTPOOL + status_check_all $TESTPOOL "uninitialized" + + # 3. Verify pool wide cancel + uninit + log_must zpool initialize $TESTPOOL + status_check_all $TESTPOOL "[[:digit:]]* initialized" + + log_mustnot zpool initialize -u $TESTPOOL + status_check_all $TESTPOOL "[[:digit:]]* initialized" -# 3. Verify pool wide cancel + uninit -log_must zpool initialize $TESTPOOL -status_check_all $TESTPOOL "[[:digit:]]* initialized" + log_must zpool initialize -c $TESTPOOL + status_check_all $TESTPOOL "uninitialized" -log_mustnot zpool initialize -u $TESTPOOL -status_check_all $TESTPOOL "[[:digit:]]* initialized" + log_must zpool initialize -u $TESTPOOL + status_check_all $TESTPOOL "uninitialized" -log_must zpool initialize -c $TESTPOOL -status_check_all $TESTPOOL "uninitialized" + # 3. Verify pool wide suspend + uninit + log_must zpool initialize $TESTPOOL + status_check_all $TESTPOOL "[[:digit:]]* initialized" -log_must zpool initialize -u $TESTPOOL -status_check_all $TESTPOOL "uninitialized" + log_mustnot zpool initialize -u $TESTPOOL + status_check_all $TESTPOOL "[[:digit:]]* initialized" -# 3. Verify pool wide suspend + uninit -log_must zpool initialize $TESTPOOL -status_check_all $TESTPOOL "[[:digit:]]* initialized" + log_must zpool initialize -s $TESTPOOL + status_check_all $TESTPOOL "suspended" -log_mustnot zpool initialize -u $TESTPOOL -status_check_all $TESTPOOL "[[:digit:]]* initialized" + log_must zpool initialize -u $TESTPOOL + status_check_all $TESTPOOL "uninitialized" -log_must zpool initialize -s $TESTPOOL -status_check_all $TESTPOOL "suspended" + # 4. Verify per-disk cancel|suspend + uninit + log_must zpool initialize $TESTPOOL + status_check_all $TESTPOOL "[[:digit:]]* initialized" -log_must zpool initialize -u $TESTPOOL -status_check_all $TESTPOOL "uninitialized" + log_must zpool initialize -c $TESTPOOL $DISK1 + log_must zpool initialize -s $TESTPOOL $DISK2 + log_mustnot zpool initialize -u $TESTPOOL $DISK3 + status_check $TESTPOOL "uninitialized" "suspended" "[[:digit:]]* initialized" -# 4. Verify per-disk cancel|suspend + uninit -log_must zpool initialize $TESTPOOL -status_check_all $TESTPOOL "[[:digit:]]* initialized" + log_must zpool initialize -u $TESTPOOL $DISK1 + status_check $TESTPOOL "uninitialized" "suspended" "[[:digit:]]* initialized" -log_must zpool initialize -c $TESTPOOL $DISK1 -log_must zpool initialize -s $TESTPOOL $DISK2 -log_mustnot zpool initialize -u $TESTPOOL $DISK3 -status_check $TESTPOOL "uninitialized" "suspended" "[[:digit:]]* initialized" + log_must zpool initialize -u $TESTPOOL $DISK2 + status_check $TESTPOOL "uninitialized" "uninitialized" "[[:digit:]]* initialized" -log_must zpool initialize -u $TESTPOOL $DISK1 -status_check $TESTPOOL "uninitialized" "suspended" "[[:digit:]]* initialized" + log_must zpool initialize $TESTPOOL $DISK1 + status_check $TESTPOOL "[[:digit:]]* initialized" "uninitialized" "[[:digit:]]* initialized" -log_must zpool initialize -u $TESTPOOL $DISK2 -status_check $TESTPOOL "uninitialized" "uninitialized" "[[:digit:]]* initialized" + log_must zpool initialize $TESTPOOL $DISK2 + status_check_all $TESTPOOL "[[:digit:]]* initialized" -log_must zpool initialize $TESTPOOL $DISK1 -status_check $TESTPOOL "[[:digit:]]* initialized" "uninitialized" "[[:digit:]]* initialized" + log_must zpool initialize -s $TESTPOOL + status_check_all $TESTPOOL "suspended" -log_must zpool initialize $TESTPOOL $DISK2 -status_check_all $TESTPOOL "[[:digit:]]* initialized" + log_must zpool initialize -u $TESTPOOL $DISK1 $DISK2 $DISK3 + status_check_all $TESTPOOL "uninitialized" -log_must zpool initialize -s $TESTPOOL -status_check_all $TESTPOOL "suspended" + poolexists $TESTPOOL && destroy_pool $TESTPOOL -log_must zpool initialize -u $TESTPOOL $DISK1 $DISK2 $DISK3 -status_check_all $TESTPOOL "uninitialized" +done log_pass "Initialize start + cancel/suspend + uninit + start works" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_checksums.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_checksums.ksh index a8d06d464851..b5c2cda1ee1d 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_checksums.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_checksums.ksh @@ -37,24 +37,31 @@ # 3. Start initializing and verify that initializing is active. # 4. Write more data to the pool. # 5. Run zdb to validate checksums. +# 6. Repeat for other VDEVs. # DISK1=${DISKS%% *} -log_must zpool create -f $TESTPOOL $DISK1 -log_must dd if=/dev/urandom of=/$TESTPOOL/file1 bs=1M count=30 -sync_all_pools +for type in "" "anyraid0"; do -log_must zpool initialize $TESTPOOL + log_must zpool create -f $TESTPOOL $type $DISK1 + log_must dd if=/dev/urandom of=/$TESTPOOL/file1 bs=1M count=30 + sync_all_pools -log_must zdb -cc $TESTPOOL + log_must zpool initialize $TESTPOOL -[[ -z "$(initialize_progress $TESTPOOL $DISK1)" ]] && \ - log_fail "Initializing did not start" + log_must zdb -cc $TESTPOOL -log_must dd if=/dev/urandom of=/$TESTPOOL/file2 bs=1M count=30 -sync_all_pools + [[ -z "$(initialize_progress $TESTPOOL $DISK1)" ]] && \ + log_fail "Initializing did not start" -log_must zdb -cc $TESTPOOL + log_must dd if=/dev/urandom of=/$TESTPOOL/file2 bs=1M count=30 + sync_all_pools + + log_must zdb -cc $TESTPOOL + + poolexists $TESTPOOL && destroy_pool $TESTPOOL + +done log_pass "Initializing does not corrupt existing or new data" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_initialized.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_initialized.ksh index 92e6164d637d..5cdf6d94e834 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_initialized.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_initialized.ksh @@ -59,30 +59,37 @@ log_must set_tunable64 INITIALIZE_VALUE $(printf %llu 0x$PATTERN) log_must mkdir "$TESTDIR" log_must truncate -s $MINVDEVSIZE "$SMALLFILE" -log_must zpool create $TESTPOOL "$SMALLFILE" -log_must zpool initialize -w $TESTPOOL -log_must zpool export $TESTPOOL -metaslabs=0 -bs=512 -zdb -p $TESTDIR -Pme $TESTPOOL | awk '/metaslab[ ]+[0-9]+/ { print $4, $8 }' | -while read -r offset size; do - log_note "offset: '$offset'" - log_note "size: '$size'" +for type in "" "anyraid0"; do - metaslabs=$((metaslabs + 1)) - offset=$(((4 * 1024 * 1024) + 16#$offset)) - log_note "vdev file offset: '$offset'" + log_must zpool create $TESTPOOL $type "$SMALLFILE" + log_must zpool initialize -w $TESTPOOL + log_must zpool export $TESTPOOL - # Note we use '-t x4' instead of '-t x8' here because x8 is not - # a supported format on FreeBSD. - dd if=$SMALLFILE skip=$((offset / bs)) count=$((size / bs)) bs=$bs | - od -t x4 -Ad | grep -qE "deadbeef +deadbeef +deadbeef +deadbeef" || - log_fail "Pattern not found in metaslab free space" -done + metaslabs=0 + bs=512 + zdb -p $TESTDIR -Pme $TESTPOOL | awk '/metaslab[ ]+[0-9]+/ { print $4, $8 }' | + while read -r offset size; do + log_note "offset: '$offset'" + log_note "size: '$size'" + + metaslabs=$((metaslabs + 1)) + offset=$(((4 * 1024 * 1024) + 16#$offset)) + log_note "vdev file offset: '$offset'" + + # Note we use '-t x4' instead of '-t x8' here because x8 is not + # a supported format on FreeBSD. + dd if=$SMALLFILE skip=$((offset / bs)) count=$((size / bs)) bs=$bs | + od -t x4 -Ad | grep -qE "deadbeef +deadbeef +deadbeef +deadbeef" || + log_fail "Pattern not found in metaslab free space" + done -if [[ $metaslabs -eq 0 ]]; then - log_fail "Did not find any metaslabs to check" -else - log_pass "Initializing wrote to each metaslab" -fi + if [[ $metaslabs -eq 0 ]]; then + log_fail "Did not find any metaslabs to check" + else + log_pass "Initializing wrote to each metaslab" + fi + + poolexists $TESTPOOL && destroy_pool $TESTPOOL + +done diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh index 6397e26b5d89..2f28f4874a99 100755 --- a/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh @@ -56,7 +56,14 @@ zed_events_drain TESTFILE="/$TESTPOOL/$TESTFS/testfile" -for type in "mirror" "raidz" "raidz2" "draid:1s"; do +for type in "mirror" "raidz" "raidz2" "draid:1s" "anyraid1" "anyraid2" "anyraid3"; do + if [[ "$type" =~ "anyraid" ]]; then + export VDEVSIZE=1073741824 + export TESTFILE_SIZE=268435456 + else + export VDEVSIZE=$MINVDEVSIZE + export TESTFILE_SIZE=67108864 + fi if [ "$type" = "draid:1s" ]; then # 1. Create a dRAID pool with a distributed hot spare # @@ -64,13 +71,13 @@ for type in "mirror" "raidz" "raidz2" "draid:1s"; do # vdev since the dRAID permutation at these offsets maps # to distributed spare space and not data devices. # - log_must truncate -s $MINVDEVSIZE $VDEV_FILES + log_must truncate -s $VDEVSIZE $VDEV_FILES log_must zpool create -f $TESTPOOL $type $VDEV_FILES SPARE="draid1-0-0" FAULT="$TEST_BASE_DIR/file-2" else # 1. Create a pool with hot spares - log_must truncate -s $MINVDEVSIZE $VDEV_FILES $SPARE_FILE + log_must truncate -s $VDEVSIZE $VDEV_FILES $SPARE_FILE log_must zpool create -f $TESTPOOL $type $VDEV_FILES \ spare $SPARE_FILE SPARE=$SPARE_FILE @@ -79,14 +86,14 @@ for type in "mirror" "raidz" "raidz2" "draid:1s"; do # 2. Create a filesystem with the primary cache disable to force reads log_must zfs create -o primarycache=none $TESTPOOL/$TESTFS - log_must zfs set recordsize=16k $TESTPOOL/$TESTFS + log_must zfs set recordsize=16k compression=off $TESTPOOL/$TESTFS # 3. Write a file to the pool to be read back - log_must dd if=/dev/urandom of=$TESTFILE bs=1M count=64 + log_must dd if=/dev/urandom of=$TESTFILE bs=1M count=$(( TESTFILE_SIZE / 1024 / 1024 )) # 4. Inject IO ERRORS on read with a zinject error handler log_must zinject -d $FAULT -e io -T read $TESTPOOL - log_must cp $TESTFILE /dev/null + log_must dd if=$TESTFILE of=/dev/null bs=1M count=$(( TESTFILE_SIZE / 1024 / 1024 )) # 5. Verify the ZED kicks in a hot spare and expected pool/device status log_note "Wait for ZED to auto-spare" diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh index 1d104fe6c106..a1746757c299 100755 --- a/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh @@ -59,22 +59,30 @@ fi TESTFILE="/$TESTPOOL/$TESTFS/testfile" -for type in "mirror" "raidz" "raidz2"; do +for type in "mirror" "raidz" "raidz2" "anyraid1" "anyraid2" "anyraid3"; do + if [[ "$type" =~ "anyraid" ]]; then + export VDEVSIZE=1073741824 + export TESTFILE_SIZE=268435456 + else + export VDEVSIZE=$MINVDEVSIZE + export TESTFILE_SIZE=67108864 + fi # 1. Create a pool with hot spares - log_must truncate -s $MINVDEVSIZE $VDEV_FILES $SPARE_FILE + log_must truncate -s $VDEVSIZE $VDEV_FILES $SPARE_FILE log_must zpool create -f $TESTPOOL $type $VDEV_FILES \ spare $SPARE_FILE # 2. Create a filesystem with the primary cache disable to force reads log_must zfs create -o primarycache=none $TESTPOOL/$TESTFS - log_must zfs set recordsize=16k $TESTPOOL/$TESTFS + log_must zfs set recordsize=16k compression=off $TESTPOOL/$TESTFS # 3. Write a file to the pool to be read back - log_must dd if=/dev/urandom of=$TESTFILE bs=1M count=64 + log_must dd if=/dev/urandom of=$TESTFILE bs=1M count=$(( TESTFILE_SIZE / 1024 / 1024 )) # 4. Inject CHECKSUM ERRORS on read with a zinject error handler - log_must zinject -d $FAULT_FILE -e corrupt -f 50 -T read $TESTPOOL - log_must dd if=$TESTFILE of=/dev/null bs=1M count=64 + log_must zinject -d $FAULT_FILE -e corrupt -f 100 -T read $TESTPOOL + log_must dd if=$TESTFILE of=/dev/null bs=1M count=$(( TESTFILE_SIZE / 1024 / 1024 )) + log_must zinject # 5. Verify the ZED kicks in a hot spare and expected pool/device status log_note "Wait for ZED to auto-spare" diff --git a/tests/zfs-tests/tests/functional/fault/cleanup.ksh b/tests/zfs-tests/tests/functional/fault/cleanup.ksh index 8801991263cc..bab3de0fdbfb 100755 --- a/tests/zfs-tests/tests/functional/fault/cleanup.ksh +++ b/tests/zfs-tests/tests/functional/fault/cleanup.ksh @@ -35,4 +35,6 @@ zed_stop zed_cleanup resilver_finish-start-scrub.sh zed_events_drain +restore_tunable ANYRAID_MIN_TILE_SIZE + log_pass diff --git a/tests/zfs-tests/tests/functional/fault/fault.cfg b/tests/zfs-tests/tests/functional/fault/fault.cfg index 30887f290ed4..7773709ba23b 100644 --- a/tests/zfs-tests/tests/functional/fault/fault.cfg +++ b/tests/zfs-tests/tests/functional/fault/fault.cfg @@ -50,6 +50,6 @@ if is_linux; then fi export VDEV_FILES="$TEST_BASE_DIR/file-1 $TEST_BASE_DIR/file-2 \ - $TEST_BASE_DIR/file-3 $TEST_BASE_DIR/file-4" + $TEST_BASE_DIR/file-3 $TEST_BASE_DIR/file-4 $TEST_BASE_DIR/file-5" export SPARE_FILE="$TEST_BASE_DIR/spare-1" export FAULT_FILE="$TEST_BASE_DIR/file-1" diff --git a/tests/zfs-tests/tests/functional/fault/setup.ksh b/tests/zfs-tests/tests/functional/fault/setup.ksh index 6ca860ed6153..0357e35785b6 100755 --- a/tests/zfs-tests/tests/functional/fault/setup.ksh +++ b/tests/zfs-tests/tests/functional/fault/setup.ksh @@ -29,6 +29,9 @@ verify_runnable "global" +log_must save_tunable ANYRAID_MIN_TILE_SIZE +log_must set_tunable64 ANYRAID_MIN_TILE_SIZE 67108864 + zed_events_drain zed_setup resilver_finish-start-scrub.sh zed_start diff --git a/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh b/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh index a8deedfb8c3c..a4c7a2ac49f6 100755 --- a/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh +++ b/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh @@ -26,7 +26,7 @@ # # DESCRIPTION: -# Check various pool geometries stripe, mirror, raidz) +# Check various pool geometries stripe, mirror, anyraid, raidz. # # STRATEGY: # 1. Create a pool on file vdevs to trim. @@ -36,7 +36,7 @@ # 5. Remove all files making it possible to trim the entire pool. # 6. Wait for auto trim to issue trim IOs for the free blocks. # 7. Verify the disks contain 30% or less allocated blocks. -# 8. Repeat for test for striped, mirrored, and RAIDZ pools. +# 8. Repeat for test for striped, mirrored, AnyRAID, and RAIDZ pools. verify_runnable "global" @@ -71,12 +71,14 @@ log_must set_tunable64 VDEV_MIN_MS_COUNT 32 typeset VDEV_MAX_MB=$(( floor(4 * MINVDEVSIZE * 0.75 / 1024 / 1024) )) typeset VDEV_MIN_MB=$(( floor(4 * MINVDEVSIZE * 0.30 / 1024 / 1024) )) -for type in "" "mirror" "raidz2" "draid"; do +for type in "" "mirror" "anyraid0" "anyraid1" "anyraid2" "anyraid3" "raidz2" "draid"; do if [[ "$type" = "" ]]; then VDEVS="$TRIM_VDEV1" elif [[ "$type" = "mirror" ]]; then VDEVS="$TRIM_VDEV1 $TRIM_VDEV2" + elif [[ "$type" =~ "anyraid" ]]; then + VDEVS="$TRIM_VDEV1 $TRIM_VDEV2 $TRIM_VDEV3 $TRIM_VDEV4" elif [[ "$type" = "raidz2" ]]; then VDEVS="$TRIM_VDEV1 $TRIM_VDEV2 $TRIM_VDEV3" elif [[ "$type" = "draid" ]]; then diff --git a/tests/zfs-tests/tests/functional/trim/autotrim_integrity.ksh b/tests/zfs-tests/tests/functional/trim/autotrim_integrity.ksh index 1995dbe6fa5c..99c5efd3a5a1 100755 --- a/tests/zfs-tests/tests/functional/trim/autotrim_integrity.ksh +++ b/tests/zfs-tests/tests/functional/trim/autotrim_integrity.ksh @@ -34,7 +34,7 @@ # 3. Generate some interesting pool data which can be trimmed. # 4. Verify trim IOs of the expected type were issued for the pool. # 5. Verify data integrity of the pool after trim. -# 6. Repeat test for striped, mirrored, and RAIDZ pools. +# 6. Repeat test for striped, mirrored, AnyRAID, and RAIDZ pools. verify_runnable "global" @@ -61,7 +61,7 @@ log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 512 typeset trim_txg_batch=$(get_tunable TRIM_TXG_BATCH) log_must set_tunable64 TRIM_TXG_BATCH 8 -for type in "" "mirror" "raidz" "draid"; do +for type in "" "mirror" "anyraid0" "anyraid1" "anyraid2" "anyraid3" "raidz" "draid"; do log_must truncate -s 1G $TRIM_VDEVS log_must zpool create -f $TESTPOOL $type $TRIM_VDEVS diff --git a/tests/zfs-tests/tests/functional/trim/autotrim_trim_integrity.ksh b/tests/zfs-tests/tests/functional/trim/autotrim_trim_integrity.ksh index 440f2bd1302a..b6ff889c0849 100755 --- a/tests/zfs-tests/tests/functional/trim/autotrim_trim_integrity.ksh +++ b/tests/zfs-tests/tests/functional/trim/autotrim_trim_integrity.ksh @@ -35,7 +35,7 @@ # 4. While generating data issue manual trims. # 4. Verify trim IOs of the expected type were issued for the pool. # 5. Verify data integrity of the pool after trim. -# 6. Repeat test for striped, mirrored, and RAIDZ pools. +# 6. Repeat test for striped, mirrored, AnyRAID, and RAIDZ pools. verify_runnable "global" @@ -62,7 +62,7 @@ log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 512 typeset trim_txg_batch=$(get_tunable TRIM_TXG_BATCH) log_must set_tunable64 TRIM_TXG_BATCH 8 -for type in "" "mirror" "raidz" "raidz2" "draid" "draid2"; do +for type in "" "mirror" "anyraid0" "anyraid1" "anyraid2" "anyraid3" "raidz" "raidz2" "draid" "draid2"; do log_must truncate -s 1G $TRIM_VDEVS log_must zpool create -f $TESTPOOL $type $TRIM_VDEVS diff --git a/tests/zfs-tests/tests/functional/trim/cleanup.ksh b/tests/zfs-tests/tests/functional/trim/cleanup.ksh index faeefb8e5acd..ada38bd1d4fa 100755 --- a/tests/zfs-tests/tests/functional/trim/cleanup.ksh +++ b/tests/zfs-tests/tests/functional/trim/cleanup.ksh @@ -46,4 +46,6 @@ TRIM_VDEVS="$TRIM_DIR/trim-vdev1 $TRIM_DIR/trim-vdev2 \ rm -rf $TRIM_VDEVS +restore_tunable ANYRAID_MIN_TILE_SIZE + default_cleanup diff --git a/tests/zfs-tests/tests/functional/trim/setup.ksh b/tests/zfs-tests/tests/functional/trim/setup.ksh index 7be2a316a873..de44ff82f5d7 100755 --- a/tests/zfs-tests/tests/functional/trim/setup.ksh +++ b/tests/zfs-tests/tests/functional/trim/setup.ksh @@ -41,4 +41,7 @@ else fi fi +log_must save_tunable ANYRAID_MIN_TILE_SIZE +log_must set_tunable64 ANYRAID_MIN_TILE_SIZE 67108864 + log_pass diff --git a/tests/zfs-tests/tests/functional/trim/trim_config.ksh b/tests/zfs-tests/tests/functional/trim/trim_config.ksh index ff569177357b..266df6f41efe 100755 --- a/tests/zfs-tests/tests/functional/trim/trim_config.ksh +++ b/tests/zfs-tests/tests/functional/trim/trim_config.ksh @@ -36,7 +36,7 @@ # 5. Manually trim the pool. # 6. Wait for trim to issue trim IOs for the free blocks. # 7. Verify the disks contain 30% or less allocated blocks. -# 8. Repeat for test for striped, mirrored, and RAIDZ pools. +# 8. Repeat for test for striped, mirrored, AnyRAID, and RAIDZ pools. verify_runnable "global" @@ -68,23 +68,41 @@ log_must set_tunable64 TRIM_TXG_BATCH 8 typeset vdev_min_ms_count=$(get_tunable VDEV_MIN_MS_COUNT) log_must set_tunable64 VDEV_MIN_MS_COUNT 32 -typeset VDEV_MAX_MB=$(( floor(4 * MINVDEVSIZE * 0.75 / 1024 / 1024) )) -typeset VDEV_MIN_MB=$(( floor(4 * MINVDEVSIZE * 0.30 / 1024 / 1024) )) +typeset VDEV_MAX_MB=$(( 4 * MINVDEVSIZE / 1024 / 1024 )) +typeset VDEV_MIN_MB=0 -for type in "" "mirror" "raidz2" "draid"; do +for type in "" "mirror" "anyraid0" "anyraid1" "anyraid2" "anyraid3" "raidz2" "draid"; do if [[ "$type" = "" ]]; then VDEVS="$TRIM_VDEV1" elif [[ "$type" = "mirror" ]]; then VDEVS="$TRIM_VDEV1 $TRIM_VDEV2" + elif [[ "$type" = "anyraid0" ]]; then + VDEVS="$TRIM_VDEV1" + elif [[ "$type" = "anyraid1" ]]; then + VDEVS="$TRIM_VDEV1 $TRIM_VDEV2" + elif [[ "$type" = "anyraid2" ]]; then + VDEVS="$TRIM_VDEV1 $TRIM_VDEV2 $TRIM_VDEV3" + elif [[ "$type" = "anyraid3" ]]; then + VDEVS="$TRIM_VDEV1 $TRIM_VDEV2 $TRIM_VDEV3 $TRIM_VDEV4" elif [[ "$type" = "raidz2" ]]; then VDEVS="$TRIM_VDEV1 $TRIM_VDEV2 $TRIM_VDEV3" elif [[ "$type" = "draid" ]]; then VDEVS="$TRIM_VDEV1 $TRIM_VDEV2 $TRIM_VDEV3 $TRIM_VDEV4" + fi + if [[ "$type" =~ "anyraid" ]]; then + # The AnyRAID VDEV takes some space for the mapping itself + VDEV_MAX_MB=$(( floor(3 * MINVDEVSIZE * 0.75 / 1024 / 1024) )) + VDEV_MIN_MB=$(( floor(3 * MINVDEVSIZE * 0.30 / 1024 / 1024) )) + elif [[ "$type" = "draid" ]]; then # The per-vdev utilization is lower due to the capacity # resilverd for the distributed spare. VDEV_MAX_MB=$(( floor(4 * MINVDEVSIZE * 0.50 / 1024 / 1024) )) + VDEV_MIN_MB=$(( floor(4 * MINVDEVSIZE * 0.30 / 1024 / 1024) )) + else + VDEV_MAX_MB=$(( floor(4 * MINVDEVSIZE * 0.75 / 1024 / 1024) )) + VDEV_MIN_MB=$(( floor(4 * MINVDEVSIZE * 0.30 / 1024 / 1024) )) fi log_must truncate -s $((4 * MINVDEVSIZE)) $VDEVS diff --git a/tests/zfs-tests/tests/functional/trim/trim_integrity.ksh b/tests/zfs-tests/tests/functional/trim/trim_integrity.ksh index f298f66a44d8..52ebbc797e38 100755 --- a/tests/zfs-tests/tests/functional/trim/trim_integrity.ksh +++ b/tests/zfs-tests/tests/functional/trim/trim_integrity.ksh @@ -34,7 +34,7 @@ # 3. Manually trim the pool. # 4. Verify trim IOs of the expected type were issued for the pool. # 5. Verify data integrity of the pool after trim. -# 6. Repeat test for striped, mirrored, and RAIDZ pools. +# 6. Repeat test for striped, mirrored, AnyRAID, and RAIDZ pools. verify_runnable "global" @@ -61,7 +61,7 @@ log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 512 typeset trim_txg_batch=$(get_tunable TRIM_TXG_BATCH) log_must set_tunable64 TRIM_TXG_BATCH 8 -for type in "" "mirror" "raidz" "draid"; do +for type in "" "mirror" "anyraid0" "anyraid1" "anyraid2" "anyraid3" "raidz" "draid"; do log_must truncate -s 1G $TRIM_VDEVS log_must zpool create -f $TESTPOOL $type $TRIM_VDEVS From 09fc9b804c6bd0b8f427013d2dcbbdd6b1b460b9 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Fri, 30 May 2025 17:13:19 -0700 Subject: [PATCH 07/21] Implement rebuild support Signed-off-by: Paul Dagnelie Sponsored-by: Eshtek, creators of HexOS Sponsored-by: Klara, Inc. --- module/zfs/spa.c | 1 + module/zfs/vdev_anyraid.c | 27 ++++++++++++++++++++++++++- module/zfs/vdev_rebuild.c | 1 + 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 623a064187e6..f03fb68dc574 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -7887,6 +7887,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing, if (tvd->vdev_ops != &vdev_mirror_ops && tvd->vdev_ops != &vdev_root_ops && + tvd->vdev_ops != &vdev_anyraid_ops && tvd->vdev_ops != &vdev_draid_ops) { return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); } diff --git a/module/zfs/vdev_anyraid.c b/module/zfs/vdev_anyraid.c index 6c0a908d17c1..51b38fa89ece 100644 --- a/module/zfs/vdev_anyraid.c +++ b/module/zfs/vdev_anyraid.c @@ -1485,6 +1485,31 @@ vdev_anyraid_mapped(vdev_t *vd, uint64_t offset) return (result); } +/* + * Return the maximum asize for a rebuild zio in the provided range + * given the following constraints. An anyraid chunk may not: + * + * - Exceed the maximum allowed block size (SPA_MAXBLOCKSIZE), or + * - Span anyraid tiles + */ +static uint64_t +vdev_anyraid_rebuild_asize(vdev_t *vd, uint64_t start, uint64_t asize, + uint64_t max_segment) +{ + vdev_anyraid_t *var = vd->vdev_tsd; + ASSERT3P(vd->vdev_ops, ==, &vdev_anyraid_ops); + + uint64_t psize = MIN(P2ROUNDUP(max_segment, 1 << vd->vdev_ashift), + SPA_MAXBLOCKSIZE); + + if (start / var->vd_tile_size != + (start + psize) / var->vd_tile_size) { + psize = P2ROUNDUP(start, var->vd_tile_size) - start; + } + + return (MIN(asize, vdev_psize_to_asize(vd, psize))); +} + vdev_ops_t vdev_anyraid_ops = { .vdev_op_init = vdev_anyraid_init, .vdev_op_fini = vdev_anyraid_fini, @@ -1502,7 +1527,7 @@ vdev_ops_t vdev_anyraid_ops = { .vdev_op_rele = NULL, .vdev_op_remap = NULL, .vdev_op_xlate = vdev_anyraid_xlate, - .vdev_op_rebuild_asize = NULL, // TODO do we want to support rebuilds? + .vdev_op_rebuild_asize = vdev_anyraid_rebuild_asize, .vdev_op_metaslab_init = NULL, .vdev_op_config_generate = vdev_anyraid_config_generate, .vdev_op_nparity = vdev_anyraid_nparity, diff --git a/module/zfs/vdev_rebuild.c b/module/zfs/vdev_rebuild.c index 30be1f851eb3..931115b5494e 100644 --- a/module/zfs/vdev_rebuild.c +++ b/module/zfs/vdev_rebuild.c @@ -525,6 +525,7 @@ vdev_rebuild_blkptr_init(blkptr_t *bp, vdev_t *vd, uint64_t start, { ASSERT(vd->vdev_ops == &vdev_draid_ops || vd->vdev_ops == &vdev_mirror_ops || + vd->vdev_ops == &vdev_anyraid_ops || vd->vdev_ops == &vdev_replacing_ops || vd->vdev_ops == &vdev_spare_ops); From 9e3ec47769682c65c8ceabeb58309b93afe18ccf Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Mon, 2 Jun 2025 14:22:52 -0700 Subject: [PATCH 08/21] Add support for anyraid in vdev properties Signed-off-by: Paul Dagnelie Sponsored-by: Eshtek, creators of HexOS Sponsored-by: Klara, Inc. --- include/sys/fs/zfs.h | 3 ++ lib/libzfs/libzfs_pool.c | 5 +++ module/zcommon/zpool_prop.c | 9 ++++++ module/zfs/vdev.c | 64 ++++++++++++++++++++++++++++++++++++- 4 files changed, 80 insertions(+), 1 deletion(-) diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 1d1cd5ec194e..b41fb4aa3259 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -388,6 +388,9 @@ typedef enum { VDEV_PROP_SIT_OUT, VDEV_PROP_AUTOSIT, VDEV_PROP_SLOW_IO_EVENTS, + VDEV_PROP_ANYRAID_CAP_TILES, + VDEV_PROP_ANYRAID_NUM_TILES, + VDEV_PROP_ANYRAID_TILE_SIZE, VDEV_NUM_PROPS } vdev_prop_t; diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 4f332329f48d..7074e0d442d2 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -5481,6 +5481,10 @@ zpool_get_vdev_prop_value(nvlist_t *nvprop, vdev_prop_t prop, char *prop_name, if (nvlist_lookup_nvlist(nvprop, prop_name, &nv) == 0) { src = fnvlist_lookup_uint64(nv, ZPROP_SOURCE); intval = fnvlist_lookup_uint64(nv, ZPROP_VALUE); + } else if (prop == VDEV_PROP_ANYRAID_CAP_TILES || + prop == VDEV_PROP_ANYRAID_NUM_TILES || + prop == VDEV_PROP_ANYRAID_TILE_SIZE) { + return (ENOENT); } else { src = ZPROP_SRC_DEFAULT; intval = vdev_prop_default_numeric(prop); @@ -5511,6 +5515,7 @@ zpool_get_vdev_prop_value(nvlist_t *nvprop, vdev_prop_t prop, char *prop_name, case VDEV_PROP_BYTES_FREE: case VDEV_PROP_BYTES_CLAIM: case VDEV_PROP_BYTES_TRIM: + case VDEV_PROP_ANYRAID_TILE_SIZE: if (literal) { (void) snprintf(buf, len, "%llu", (u_longlong_t)intval); diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c index 4826237b23e8..f841419a964b 100644 --- a/module/zcommon/zpool_prop.c +++ b/module/zcommon/zpool_prop.c @@ -436,6 +436,15 @@ vdev_prop_init(void) zprop_register_number(VDEV_PROP_BYTES_TRIM, "trim_bytes", 0, PROP_READONLY, ZFS_TYPE_VDEV, "", "TRIMBYTE", B_FALSE, sfeatures); + zprop_register_number(VDEV_PROP_ANYRAID_CAP_TILES, + "anyraid_region_capacity", 0, PROP_READONLY, ZFS_TYPE_VDEV, + "", "TILECAP", B_FALSE, sfeatures); + zprop_register_number(VDEV_PROP_ANYRAID_NUM_TILES, + "anyraid_region_count", 0, PROP_READONLY, ZFS_TYPE_VDEV, + "", "NUMTILES", B_FALSE, sfeatures); + zprop_register_number(VDEV_PROP_ANYRAID_TILE_SIZE, + "anyraid_region_size", 0, PROP_READONLY, ZFS_TYPE_VDEV, + "", "TILESIZE", B_FALSE, sfeatures); /* default numeric properties */ zprop_register_number(VDEV_PROP_CHECKSUM_N, "checksum_n", UINT64_MAX, diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index fb13dea939f2..a48db085878b 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -55,11 +55,11 @@ #include #include #include -#include #include #include #include #include +#include #include #include #include "zfs_prop.h" @@ -6723,6 +6723,68 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) break; } break; + case VDEV_PROP_ANYRAID_CAP_TILES: + { + vdev_t *pvd = vd->vdev_parent; + uint64_t total = 0; + if (vd->vdev_ops == &vdev_anyraid_ops) { + vdev_anyraid_t *var = vd->vdev_tsd; + for (int i = 0; i < vd->vdev_children; + i++) { + total += var->vd_children[i] + ->van_capacity; + } + } else if (pvd && pvd->vdev_ops == + &vdev_anyraid_ops) { + vdev_anyraid_t *var = pvd->vdev_tsd; + total = var->vd_children[vd->vdev_id] + ->van_capacity; + } else { + continue; + } + vdev_prop_add_list(outnvl, propname, + NULL, total, ZPROP_SRC_NONE); + continue; + } + case VDEV_PROP_ANYRAID_NUM_TILES: + { + vdev_t *pvd = vd->vdev_parent; + uint64_t total = 0; + if (vd->vdev_ops == &vdev_anyraid_ops) { + vdev_anyraid_t *var = vd->vdev_tsd; + for (int i = 0; i < vd->vdev_children; + i++) { + total += var->vd_children[i] + ->van_next_offset; + } + } else if (pvd && pvd->vdev_ops == + &vdev_anyraid_ops) { + vdev_anyraid_t *var = pvd->vdev_tsd; + total = var->vd_children[vd->vdev_id] + ->van_next_offset; + } else { + continue; + } + vdev_prop_add_list(outnvl, propname, + NULL, total, ZPROP_SRC_NONE); + continue; + } + case VDEV_PROP_ANYRAID_TILE_SIZE: + { + vdev_t *pvd = vd->vdev_parent; + vdev_anyraid_t *var = NULL; + if (vd->vdev_ops == &vdev_anyraid_ops) { + var = vd->vdev_tsd; + } else if (pvd && pvd->vdev_ops == + &vdev_anyraid_ops) { + var = pvd->vdev_tsd; + } else { + continue; + } + vdev_prop_add_list(outnvl, propname, + NULL, var->vd_tile_size, ZPROP_SRC_NONE); + continue; + } default: err = ENOENT; break; From f9a340dd9cb78e717f8acc73abd02234c676fc55 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Mon, 9 Jun 2025 15:41:40 -0700 Subject: [PATCH 09/21] Add man page entry Signed-off-by: Paul Dagnelie Sponsored-by: Eshtek, creators of HexOS Sponsored-by: Klara, Inc. --- man/man7/zpoolconcepts.7 | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/man/man7/zpoolconcepts.7 b/man/man7/zpoolconcepts.7 index 21bd72351209..08016194db8a 100644 --- a/man/man7/zpoolconcepts.7 +++ b/man/man7/zpoolconcepts.7 @@ -165,6 +165,26 @@ An error is returned when the provided number of children differs. The number of distributed hot spares. Defaults to zero. .El +.It Sy anyraid , anyraid0 , anyraid1 , anyraid2 +A new device type that allows for mirror-parity redundancy while using devices +of different sizes. +An AnyRAID vdev works by dividing each of the underlying disks that make it up +into +.Sy tiles \. +The tiles are then each mirrored at the desired parity level. +This allows for full redundancy, since tiles are allocated from independent +disks, while enabling maximum space usage by allocating more tiles from the +disks with the most free space. +In addition, the device can be expanded by attaching new disks, and new tiles +will be allocated from those disks. +.Sy anyraid +is a synonym for +.Sy anyraid1 +, which is the 2-way mirror parity version (1 parity tile). +.Sy anyraid2 +is a 3-way mirror (2 parity tiles), while +.Sy anyraid0 +is striped (no parity tiles), and is primarily intended for testing. .It Sy spare A pseudo-vdev which keeps track of available hot spares for a pool. For more information, see the From bc69e3d9d407d018df6105ea090d70f62d5ff219 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Mon, 8 Sep 2025 12:21:26 -0700 Subject: [PATCH 10/21] improve byteswap logic Signed-off-by: Paul Dagnelie --- include/sys/vdev_anyraid.h | 24 ++++++++++++++++++++++++ module/zfs/vdev_anyraid.c | 18 +++++++++++++++--- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/include/sys/vdev_anyraid.h b/include/sys/vdev_anyraid.h index 9276efe03fa9..6df19fed3f33 100644 --- a/include/sys/vdev_anyraid.h +++ b/include/sys/vdev_anyraid.h @@ -216,6 +216,30 @@ typedef struct anyraid_map_entry { } ame_u; } anyraid_map_entry_t; +static inline void +ame_byteswap(anyraid_map_entry_t *ame) +{ + uint8_t type = ame->ame_u.ame_amle.amle_type; + switch (type) { + case AMET_SKIP: { + anyraid_map_skip_entry_t *amse = + &ame->ame_u.ame_amse; + amse->amse_u.amse_skip_count = + BSWAP_32(amse_get_skip_count(amse)) >> NBBY; + amse->amse_u.amse_type = AMET_SKIP; + break; + } + case AMET_LOC: { + anyraid_map_loc_entry_t *amle = + &ame->ame_u.ame_amle; + amle->amle_offset = BSWAP_16(amle->amle_offset); + break; + } + default: + PANIC("Invalid entry type %d", type); + } +} + #define VDEV_ANYRAID_MAX_DISKS (1 << 8) #define VDEV_ANYRAID_MAX_TPD (1 << 16) #define VDEV_ANYRAID_MAX_TILES (VDEV_ANYRAID_MAX_DISKS * VDEV_ANYRAID_MAX_TPD) diff --git a/module/zfs/vdev_anyraid.c b/module/zfs/vdev_anyraid.c index 51b38fa89ece..aba02482570d 100644 --- a/module/zfs/vdev_anyraid.c +++ b/module/zfs/vdev_anyraid.c @@ -459,6 +459,16 @@ vdev_anyraid_pick_best_mapping(vdev_t *cvd, uint64_t *out_txg, return (error); } +#ifdef _ZFS_BIG_ENDIAN +static void +byteswap_map_buf(void *buf, uint32_t length) +{ + for (size_t i = 0; i < length; i += sizeof (anyraid_map_entry_t)) { + ame_byteswap((anyraid_map_entry_t *)((char *)buf + i)); + } +} +#endif + static int anyraid_open_existing(vdev_t *vd, uint64_t child, uint16_t **child_capacities) { @@ -596,8 +606,8 @@ anyraid_open_existing(vdev_t *vd, uint64_t child, uint16_t **child_capacities) #ifdef _ZFS_BIG_ENDIAN uint32_t length = map_length - next_map * SPA_MAXBLOCKSIZE; - byteswap_uint32_array(map_buf, length < - SPA_MAXBLOCKSIZE ? length : SPA_MAXBLOCKSIZE); + byteswap_map_buf(map_buf, (uint32_t)(length < + SPA_MAXBLOCKSIZE ? length : SPA_MAXBLOCKSIZE)); #endif } anyraid_map_entry_t *entry = @@ -1256,7 +1266,9 @@ map_write_issue(zio_t *zio, vdev_t *vd, uint64_t base_offset, int flags) { #ifdef _ZFS_BIG_ENDIAN - byteswap_uint32_array(abd, length); + void *buf = abd_borrow_buf(abd, SPA_MAXBLOCKSIZE); + byteswap_map_buf(buf, length); + abd_return_buf(abd, buf, SPA_MAXBLOCKSIZE); #else (void) length; #endif From 01f510ef31e4291a630608af0f9975291a514fdc Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Tue, 9 Sep 2025 09:30:39 -0700 Subject: [PATCH 11/21] Use zinject to try to make test fully reliable Signed-off-by: Paul Dagnelie --- .../zpool_initialize/zpool_initialize_online_offline.ksh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_online_offline.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_online_offline.ksh index 91b1d39ce4e4..15b8f32a8db3 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_online_offline.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_online_offline.ksh @@ -23,6 +23,7 @@ # # Copyright (c) 2016 by Delphix. All rights reserved. +# Copyright (c) 2025 by Klara, Inc. # . $STF_SUITE/include/libtest.shlib . $STF_SUITE/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib @@ -46,6 +47,8 @@ DISK1=${DISKS%% *} DISK2="$(echo $DISKS | cut -d' ' -f2)" DISK3="$(echo $DISKS | cut -d' ' -f3)" +log_onexit_push zpool status -v + for type in "mirror" "anyraid1"; do if [[ "$type" == "mirror" ]]; then @@ -57,6 +60,7 @@ for type in "mirror" "anyraid1"; do log_must rm /$TESTPOOL/f1 log_must zpool sync fi + log_must zinject -D 10:1 -d $DISK1 -T write $TESTPOOL log_must zpool initialize $TESTPOOL $DISK1 log_must zpool offline $TESTPOOL $DISK1 @@ -74,6 +78,7 @@ for type in "mirror" "anyraid1"; do log_mustnot eval "initialize_prog_line $TESTPOOL $DISK1 | grep suspended" log_must zpool initialize -s $TESTPOOL $DISK1 + log_must zinject -c all action_date="$(initialize_prog_line $TESTPOOL $DISK1 | \ sed 's/.*ed at \(.*\)).*/\1/g')" log_must zpool offline $TESTPOOL $DISK1 From ab0cf6b9c69bb648e533040e9cf0e9b50be6119b Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Mon, 15 Sep 2025 11:22:40 -0700 Subject: [PATCH 12/21] Final byteswap handling Signed-off-by: Paul Dagnelie --- cmd/zdb/zdb.c | 6 +-- include/sys/vdev_anyraid.h | 83 +++++++++++++++++++++----------------- module/zfs/vdev_anyraid.c | 28 +++++-------- 3 files changed, 57 insertions(+), 60 deletions(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index aec834ddca89..fd40a36edd8a 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -9699,7 +9699,7 @@ print_anyraid_mapping(vdev_t *vd, int child, int mapping, uint32_t mo = off % SPA_MAXBLOCKSIZE; anyraid_map_entry_t *entry = (anyraid_map_entry_t *)(map_buf + mo); - uint8_t type = entry->ame_u.ame_amle.amle_type; + uint8_t type = ame_get_type(entry); uint8_t *buf; boolean_t allocated = B_FALSE; if (size > SPA_MAXBLOCKSIZE - mo) { @@ -9737,8 +9737,8 @@ print_anyraid_mapping(vdev_t *vd, int child, int mapping, (void) printf("loc %u:", cur_tile); cur_tile++; } - (void) printf("\td%u o%u,", amle->amle_disk, - amle->amle_offset); + (void) printf("\td%u o%u,", amle_get_disk(amle), + amle_get_offset(amle)); par_cnt = (par_cnt + 1) % (var->vd_nparity + 1); if (par_cnt == 0) (void) printf("\n"); diff --git a/include/sys/vdev_anyraid.h b/include/sys/vdev_anyraid.h index 6df19fed3f33..c6afd1cdfe90 100644 --- a/include/sys/vdev_anyraid.h +++ b/include/sys/vdev_anyraid.h @@ -157,38 +157,33 @@ typedef enum anyraid_map_entry_type { AMET_TYPES } anyraid_map_entry_type_t; +#define AME_TYPE_BITS 8 + /* * ========================================================================== * Skip entry definitions and functions * ========================================================================== */ -typedef struct anyraid_map_skip_entry { - union { - uint8_t amse_type; - uint32_t amse_skip_count; // tile count to skip ahead - } amse_u; -} anyraid_map_skip_entry_t; +typedef uint32_t anyraid_map_skip_entry_t; #define AMSE_TILE_BITS 24 static inline void amse_set_type(anyraid_map_skip_entry_t *amse) { - amse->amse_u.amse_type = AMET_SKIP; - ASSERT3U(amse->amse_u.amse_type, ==, - BF32_GET(amse->amse_u.amse_type, 0, 8)); + BF32_SET(*amse, 0, AME_TYPE_BITS, AMET_SKIP); } static inline void amse_set_skip_count(anyraid_map_skip_entry_t *amse, uint32_t skip_count) { - BF32_SET(amse->amse_u.amse_skip_count, 8, AMSE_TILE_BITS, skip_count); + BF32_SET(*amse, AME_TYPE_BITS, AMSE_TILE_BITS, skip_count); } static inline uint32_t amse_get_skip_count(anyraid_map_skip_entry_t *amse) { - return (BF32_GET(amse->amse_u.amse_skip_count, 8, AMSE_TILE_BITS)); + return (BF32_GET(*amse, AME_TYPE_BITS, AMSE_TILE_BITS)); } /* @@ -196,12 +191,42 @@ amse_get_skip_count(anyraid_map_skip_entry_t *amse) * Location entry definitions and functions * ========================================================================== */ -typedef struct anyraid_map_loc_entry { - uint8_t amle_type; - uint8_t amle_disk; - uint16_t amle_offset; -} anyraid_map_loc_entry_t; -_Static_assert(sizeof (anyraid_map_loc_entry_t) == sizeof (uint32_t), ""); +typedef uint32_t anyraid_map_loc_entry_t; + +#define AMLE_DISK_BITS 8 +#define AMLE_OFFSET_BITS 16 + +static inline void +amle_set_type(anyraid_map_loc_entry_t *amle) +{ + BF32_SET(*amle, 0, AME_TYPE_BITS, AMET_LOC); +} + +static inline void +amle_set_disk(anyraid_map_loc_entry_t *amle, uint8_t disk) +{ + BF32_SET(*amle, AME_TYPE_BITS, AMLE_DISK_BITS, disk); +} + +static inline uint32_t +amle_get_disk(anyraid_map_loc_entry_t *amle) +{ + return (BF32_GET(*amle, AME_TYPE_BITS, AMLE_DISK_BITS)); +} + +static inline void +amle_set_offset(anyraid_map_loc_entry_t *amle, uint8_t offset) +{ + BF32_SET(*amle, (AME_TYPE_BITS + AMLE_DISK_BITS), AMLE_OFFSET_BITS, + offset); +} + +static inline uint32_t +amle_get_offset(anyraid_map_loc_entry_t *amle) +{ + return (BF32_GET(*amle, (AME_TYPE_BITS + AMLE_DISK_BITS), + AMLE_OFFSET_BITS)); +} /* * ========================================================================== @@ -216,28 +241,10 @@ typedef struct anyraid_map_entry { } ame_u; } anyraid_map_entry_t; -static inline void -ame_byteswap(anyraid_map_entry_t *ame) +static inline anyraid_map_entry_type_t +ame_get_type(anyraid_map_entry_t *ame) { - uint8_t type = ame->ame_u.ame_amle.amle_type; - switch (type) { - case AMET_SKIP: { - anyraid_map_skip_entry_t *amse = - &ame->ame_u.ame_amse; - amse->amse_u.amse_skip_count = - BSWAP_32(amse_get_skip_count(amse)) >> NBBY; - amse->amse_u.amse_type = AMET_SKIP; - break; - } - case AMET_LOC: { - anyraid_map_loc_entry_t *amle = - &ame->ame_u.ame_amle; - amle->amle_offset = BSWAP_16(amle->amle_offset); - break; - } - default: - PANIC("Invalid entry type %d", type); - } + return (BF32_GET(ame->ame_u.ame_amle, 0, AME_TYPE_BITS)); } #define VDEV_ANYRAID_MAX_DISKS (1 << 8) diff --git a/module/zfs/vdev_anyraid.c b/module/zfs/vdev_anyraid.c index aba02482570d..1746c207e967 100644 --- a/module/zfs/vdev_anyraid.c +++ b/module/zfs/vdev_anyraid.c @@ -238,8 +238,8 @@ static void create_tile_entry(vdev_anyraid_t *var, anyraid_map_loc_entry_t *amle, uint8_t *pat_cnt, anyraid_tile_t **out_ar, uint32_t *cur_tile) { - uint8_t disk = amle->amle_disk; - uint16_t offset = amle->amle_offset; + uint8_t disk = amle_get_disk(amle); + uint16_t offset = amle_get_offset(amle); anyraid_tile_t *ar = *out_ar; if (*pat_cnt == 0) { @@ -459,16 +459,6 @@ vdev_anyraid_pick_best_mapping(vdev_t *cvd, uint64_t *out_txg, return (error); } -#ifdef _ZFS_BIG_ENDIAN -static void -byteswap_map_buf(void *buf, uint32_t length) -{ - for (size_t i = 0; i < length; i += sizeof (anyraid_map_entry_t)) { - ame_byteswap((anyraid_map_entry_t *)((char *)buf + i)); - } -} -#endif - static int anyraid_open_existing(vdev_t *vd, uint64_t child, uint16_t **child_capacities) { @@ -606,13 +596,13 @@ anyraid_open_existing(vdev_t *vd, uint64_t child, uint16_t **child_capacities) #ifdef _ZFS_BIG_ENDIAN uint32_t length = map_length - next_map * SPA_MAXBLOCKSIZE; - byteswap_map_buf(map_buf, (uint32_t)(length < - SPA_MAXBLOCKSIZE ? length : SPA_MAXBLOCKSIZE)); + byteswap_uint32_array(map_buf, MIN(length, + SPA_MAXBLOCKSIZE)); #endif } anyraid_map_entry_t *entry = (anyraid_map_entry_t *)(map_buf + (off % SPA_MAXBLOCKSIZE)); - uint8_t type = entry->ame_u.ame_amle.amle_type; + uint8_t type = ame_get_type(entry); switch (type) { case AMET_SKIP: { anyraid_map_skip_entry_t *amse = @@ -1236,9 +1226,9 @@ static boolean_t map_write_loc_entry(anyraid_tile_node_t *arn, void *buf, uint32_t *offset) { anyraid_map_loc_entry_t *entry = (void *)((char *)buf + *offset); - entry->amle_type = AMET_LOC; - entry->amle_disk = arn->atn_disk; - entry->amle_offset = arn->atn_offset; + amle_set_type(entry); + amle_set_disk(entry, arn->atn_disk); + amle_set_offset(entry, arn->atn_offset); *offset += sizeof (*entry); return (*offset == SPA_MAXBLOCKSIZE); } @@ -1267,7 +1257,7 @@ map_write_issue(zio_t *zio, vdev_t *vd, uint64_t base_offset, { #ifdef _ZFS_BIG_ENDIAN void *buf = abd_borrow_buf(abd, SPA_MAXBLOCKSIZE); - byteswap_map_buf(buf, length); + byteswap_uint32_array(buf, length); abd_return_buf(abd, buf, SPA_MAXBLOCKSIZE); #else (void) length; From 15d3be1a883e780474398ff0297808cc0d07676e Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Tue, 16 Sep 2025 09:43:58 -0700 Subject: [PATCH 13/21] Tony's feedback Signed-off-by: Paul Dagnelie --- cmd/zdb/zdb.c | 33 +++-- cmd/zpool/zpool_vdev.c | 14 +- cmd/ztest.c | 17 +-- include/sys/fs/zfs.h | 2 +- include/sys/vdev_anyraid.h | 2 +- include/sys/vdev_impl.h | 4 + lib/libzfs/libzfs_pool.c | 3 +- man/man4/zfs.4 | 9 ++ man/man7/vdevprops.7 | 15 ++ man/man7/zpoolconcepts.7 | 14 +- module/os/freebsd/zfs/vdev_geom.c | 1 + module/os/linux/zfs/vdev_disk.c | 1 + module/zcommon/zfs_namecheck.c | 3 +- module/zcommon/zpool_prop.c | 6 +- module/zfs/spa.c | 5 +- module/zfs/vdev.c | 19 ++- module/zfs/vdev_anyraid.c | 139 +++++++++--------- module/zfs/vdev_draid.c | 9 ++ module/zfs/vdev_file.c | 1 + module/zfs/vdev_indirect.c | 1 + module/zfs/vdev_mirror.c | 3 + module/zfs/vdev_missing.c | 2 + module/zfs/vdev_raidz.c | 8 + module/zfs/vdev_root.c | 1 + tests/runfiles/common.run | 5 +- tests/runfiles/sanity.run | 4 +- tests/zfs-tests/tests/Makefile.am | 2 + .../functional/anyraid/anyraid_checkpoint.ksh | 2 +- .../anyraid/anyraid_clean_mirror_001_pos.ksh | 4 +- .../anyraid/anyraid_clean_mirror_002_pos.ksh | 4 +- .../anyraid/anyraid_clean_mirror_003_pos.ksh | 2 +- ...nyraid_faildisk_write_replace_resilver.ksh | 4 +- .../anyraid_offline_write_online_resilver.ksh | 12 +- .../anyraid/anyraid_special_vdev_001_pos.ksh | 2 +- .../anyraid/anyraid_special_vdev_002_pos.ksh | 3 +- .../anyraid/anyraid_tile_layout.ksh | 4 +- .../cli_root/zpool_add/zpool_add_001_pos.ksh | 2 +- .../zpool_attach/zpool_attach_002_pos.ksh | 4 +- .../zpool_attach/zpool_attach_003_pos.ksh | 18 +-- .../zpool_create/zpool_create_001_pos.ksh | 10 +- .../zpool_create/zpool_create_005_pos.ksh | 2 +- .../zpool_create/zpool_create_006_pos.ksh | 18 +-- .../zpool_create/zpool_create_007_neg.ksh | 8 +- .../zpool_create/zpool_create_009_neg.ksh | 2 +- .../zpool_create/zpool_create_010_neg.ksh | 2 +- .../zpool_create_anyraid_001_pos.ksh | 12 +- .../zpool_create_anyraid_002_pos.ksh | 4 +- .../zpool_create_anyraid_003_pos.ksh | 2 +- .../zpool_create_anyraid_004_pos.ksh | 57 +++++++ .../zpool_create_anyraid_005_neg.ksh | 56 +++++++ .../zpool_export_anyraid_001_pos.ksh | 2 +- .../zpool_import/zpool_import_010_pos.ksh | 2 +- .../zpool_initialize_anyraid_attach.ksh | 4 +- ..._initialize_fault_export_import_online.ksh | 4 +- .../zpool_initialize_import_export.ksh | 4 +- ...nitialize_offline_export_import_online.ksh | 2 +- .../zpool_initialize_online_offline.ksh | 2 +- .../zpool_initialize_start_and_cancel_neg.ksh | 4 +- .../zpool_initialize_start_and_cancel_pos.ksh | 2 +- .../zpool_initialize_uninit.ksh | 4 +- .../zpool_initialize_verify_checksums.ksh | 2 +- .../zpool_initialize_verify_initialized.ksh | 2 +- .../functional/fault/auto_spare_001_pos.ksh | 2 +- .../functional/fault/auto_spare_002_pos.ksh | 2 +- .../tests/functional/trim/autotrim_config.ksh | 2 +- .../functional/trim/autotrim_integrity.ksh | 2 +- .../trim/autotrim_trim_integrity.ksh | 2 +- .../tests/functional/trim/trim_config.ksh | 10 +- .../tests/functional/trim/trim_integrity.ksh | 2 +- 69 files changed, 411 insertions(+), 202 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_004_pos.ksh create mode 100755 tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_005_neg.ksh diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index fd40a36edd8a..631bf1d850e3 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -9503,7 +9503,7 @@ zdb_print_anyraid_tile_layout(vdev_t *vd) // Create and populate table with all the values we need to print. char ***table = malloc(sizeof (*table) * cols); for (int i = 0; i < cols; i++) { - table[i] = calloc(var->vd_children[i]->van_capacity, + table[i] = calloc(var->vd_children[i]->van_capacity + 1, sizeof (**table)); } @@ -9549,7 +9549,7 @@ zdb_print_anyraid_tile_layout(vdev_t *vd) for (int v = 0; v < cols; v++) { if (final[v]) { ASSERT3U(i, >=, - var->vd_children[v]->van_capacity); + var->vd_children[v]->van_capacity + 1); int extra_width = 0; if (v == 0 || !printed[v - 1]) extra_width++; @@ -9558,7 +9558,7 @@ zdb_print_anyraid_tile_layout(vdev_t *vd) printed[v] = B_FALSE; continue; } - if (i + 1 == var->vd_children[v]->van_capacity) + if (i + 1 == var->vd_children[v]->van_capacity + 1) final[v] = B_TRUE; if (v - 1 != last_printed) (void) printf("│"); @@ -9575,7 +9575,7 @@ zdb_print_anyraid_tile_layout(vdev_t *vd) } (void) printf("\n"); for (int i = 0; i < cols; i++) { - for (int j = 0; j < var->vd_children[i]->van_capacity; j++) + for (int j = 0; j < var->vd_children[i]->van_capacity + 1; j++) if (table[i][j]) free(table[i][j]); free(table[i]); @@ -9648,7 +9648,7 @@ print_anyraid_mapping(vdev_t *vd, int child, int mapping, &disk_id) != 0) (void) printf("No valid disk ID\n"); - (void) printf("version: %6d\ttile size: %8lx\ttxg: %lu\n", + (void) printf("version: %6d\ttile size: %#8lx\ttxg: %lu\n", version, tile_size, written_txg); (void) printf("map length: %6u\tdisk id: %3u\n", map_length, disk_id); @@ -9831,12 +9831,18 @@ zdb_dump_anyraid_map_vdev(vdev_t *vd, int verbosity) ASSERT3P(vd->vdev_ops, ==, &vdev_anyraid_ops); vdev_anyraid_t *var = vd->vdev_tsd; - (void) printf("\t%-5s%11llu %s %16llx\n", + (void) printf("\t%-5s%11llu %s %#16llx\n", "vdev", (u_longlong_t)vd->vdev_id, "tile_size", (u_longlong_t)var->vd_tile_size); - (void) printf("\t%-8s%8llu %-12s %10u\n", "tiles", - (u_longlong_t)avl_numnodes(&var->vd_tile_map), - "checkpoint tile", var->vd_checkpoint_tile); + (void) printf("\t%-8s%8llu", "tiles", + (u_longlong_t)avl_numnodes(&var->vd_tile_map)); + if (var->vd_checkpoint_tile != UINT32_MAX) { + (void) printf(". %-12s %10u\n", "checkpoint tile", + var->vd_checkpoint_tile); + } else { + (void) printf("\n"); + } + (void) printf("\t%16s %12s %13s\n", "----------------", "------------", "-------------"); @@ -9868,8 +9874,6 @@ zdb_dump_anyraid_map(char *vdev_str, spa_t *spa, int verbosity) { vdev_t *rvd, *vd; - (void) printf("\nAnyRAID tiles:\n"); - /* A specific vdev. */ if (vdev_str != NULL) { vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev_str); @@ -9877,14 +9881,19 @@ zdb_dump_anyraid_map(char *vdev_str, spa_t *spa, int verbosity) (void) printf("Invalid vdev: %s\n", vdev_str); return (EINVAL); } - if (vd->vdev_ops != &vdev_anyraid_ops) { + if (vd->vdev_ops != &vdev_anyraid_ops && + (vd->vdev_parent == NULL || + (vd = vd->vdev_parent)->vdev_ops != &vdev_anyraid_ops)) { (void) printf("Not an anyraid vdev: %s\n", vdev_str); return (EINVAL); } + + (void) printf("\nAnyRAID tiles:\n"); zdb_dump_anyraid_map_vdev(vd, verbosity); return (0); } + (void) printf("\nAnyRAID tiles:\n"); /* All anyraid vdevs. */ rvd = spa->spa_root_vdev; for (uint64_t c = 0; c < rvd->vdev_children; c++) { diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c index 8ab6f8dff069..6495d7592691 100644 --- a/cmd/zpool/zpool_vdev.c +++ b/cmd/zpool/zpool_vdev.c @@ -433,7 +433,7 @@ is_raidz_mirror(replication_level_t *a, replication_level_t *b, if ((strcmp(a->zprl_type, "raidz") == 0 || strcmp(a->zprl_type, "draid") == 0) && (strcmp(b->zprl_type, "mirror") == 0 || - strcmp(b->zprl_type, "anyraid") == 0)) { + strcmp(b->zprl_type, "anymirror") == 0)) { *raidz = a; *mirror = b; return (B_TRUE); @@ -529,11 +529,11 @@ get_replication(nvlist_t *nvroot, boolean_t fatal) rep.zprl_children = 0; if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 || - strcmp(type, VDEV_TYPE_DRAID) == 0) { + strcmp(type, VDEV_TYPE_DRAID) == 0 || + strcmp(type, VDEV_TYPE_ANYRAID) == 0) { verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY, &rep.zprl_parity) == 0); - assert(rep.zprl_parity != 0); } else { rep.zprl_parity = 0; } @@ -745,7 +745,9 @@ get_replication(nvlist_t *nvroot, boolean_t fatal) rep.zprl_type); else return (NULL); - } else if (lastrep.zprl_children != rep.zprl_children) { + } else if (lastrep.zprl_children != + rep.zprl_children && strcmp(rep.zprl_type, + VDEV_TYPE_ANYRAID) != 0) { if (ret) free(ret); ret = NULL; @@ -1232,7 +1234,7 @@ get_parity(const char *type) parity = strtol(p, &end, 10); if (errno != 0 || *end != '\0' || parity < 0 || parity > VDEV_ANYRAID_MAXPARITY) { - return (0); + return (-1); } } } else if (strncmp(type, VDEV_TYPE_DRAID, @@ -1294,6 +1296,8 @@ is_grouping(const char *type, int *mindev, int *maxdev) if (strncmp(type, VDEV_TYPE_ANYRAID, strlen(VDEV_TYPE_ANYRAID)) == 0) { nparity = get_parity(type); + if (nparity < 0) + return (NULL); if (mindev != NULL) *mindev = nparity + 1; if (maxdev != NULL) diff --git a/cmd/ztest.c b/cmd/ztest.c index 9a6ba6072954..857ccadf2aa3 100644 --- a/cmd/ztest.c +++ b/cmd/ztest.c @@ -189,7 +189,7 @@ typedef struct ztest_shared_opts { int zo_raid_do_expand; int zo_raid_children; int zo_raid_parity; - char zo_raid_type[8]; + char zo_raid_type[16]; int zo_draid_data; int zo_draid_spares; int zo_datasets; @@ -773,7 +773,7 @@ static ztest_option_t option_table[] = { DEFAULT_RAID_CHILDREN, NULL}, { 'R', "raid-parity", "INTEGER", "Raid parity", DEFAULT_RAID_PARITY, NULL}, - { 'K', "raid-kind", "raidz|eraidz|draid|anyraid|random", "Raid kind", + { 'K', "raid-kind", "raidz|eraidz|draid|anymirror|random", "Raid kind", NO_DEFAULT, "random"}, { 'D', "draid-data", "INTEGER", "Number of draid data drives", DEFAULT_DRAID_DATA, NULL}, @@ -1134,7 +1134,7 @@ process_options(int argc, char **argv) raid_kind = "draid"; break; case 3: - raid_kind = "anyraid"; + raid_kind = "anymirror"; break; } @@ -1190,7 +1190,7 @@ process_options(int argc, char **argv) } else if (strcmp(raid_kind, "raidz") == 0) { zo->zo_raid_parity = MIN(zo->zo_raid_parity, zo->zo_raid_children - 1); - } else if (strcmp(raid_kind, "anyraid") == 0) { + } else if (strcmp(raid_kind, "anymirror") == 0) { uint64_t min_devsize; /* With fewer disks use 1G, otherwise 512M is OK */ @@ -3815,7 +3815,8 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) if (ztest_opts.zo_raid_children > 1) { if (strcmp(oldvd->vdev_ops->vdev_op_type, "raidz") == 0) ASSERT3P(oldvd->vdev_ops, ==, &vdev_raidz_ops); - else if (strcmp(oldvd->vdev_ops->vdev_op_type, "anyraid") == 0) + else if (strcmp(oldvd->vdev_ops->vdev_op_type, "anymirror") == + 0) ASSERT3P(oldvd->vdev_ops, ==, &vdev_anyraid_ops); else ASSERT3P(oldvd->vdev_ops, ==, &vdev_draid_ops); @@ -3839,11 +3840,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) } oldguid = oldvd->vdev_guid; - if (oldvd->vdev_ops != &vdev_anyraid_ops) - oldsize = vdev_get_min_asize(oldvd); - else - oldsize = oldvd->vdev_child[ - ztest_random(oldvd->vdev_children)]->vdev_asize; + oldsize = vdev_get_min_attach_size(oldvd); oldvd_is_log = oldvd->vdev_top->vdev_islog; oldvd_is_special = oldvd->vdev_top->vdev_alloc_bias == VDEV_BIAS_SPECIAL || diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index b41fb4aa3259..1ddedbc01d99 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -917,7 +917,7 @@ typedef struct zpool_load_policy { #define VDEV_TYPE_MIRROR "mirror" #define VDEV_TYPE_REPLACING "replacing" #define VDEV_TYPE_RAIDZ "raidz" -#define VDEV_TYPE_ANYRAID "anyraid" +#define VDEV_TYPE_ANYRAID "anymirror" #define VDEV_TYPE_DRAID "draid" #define VDEV_TYPE_DRAID_SPARE "dspare" #define VDEV_TYPE_DISK "disk" diff --git a/include/sys/vdev_anyraid.h b/include/sys/vdev_anyraid.h index c6afd1cdfe90..7c750adc6030 100644 --- a/include/sys/vdev_anyraid.h +++ b/include/sys/vdev_anyraid.h @@ -44,6 +44,7 @@ typedef struct vdev_anyraid_node { avl_node_t van_node; uint8_t van_id; uint16_t van_next_offset; + // Note: store capacity - 1 for rollover reasons uint16_t van_capacity; } vdev_anyraid_node_t; @@ -275,7 +276,6 @@ _Static_assert(VDEV_ANYRAID_MAP_SIZE % SPA_MAXBLOCKSIZE == 0, ""); void vdev_anyraid_write_map_sync(vdev_t *vd, zio_t *pio, uint64_t txg, uint64_t *good_writes, int flags, vdev_config_sync_status_t status); -uint64_t vdev_anyraid_min_newsize(vdev_t *vd, uint64_t ashift); void vdev_anyraid_expand(vdev_t *tvd, vdev_t *newvd); boolean_t vdev_anyraid_mapped(vdev_t *vd, uint64_t offset); diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index f58683ecd9bf..8f1e0e197b74 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -71,6 +71,7 @@ typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size, typedef void vdev_close_func_t(vdev_t *vd); typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize, uint64_t txg); typedef uint64_t vdev_min_asize_func_t(vdev_t *pvd, vdev_t *cvd); +typedef uint64_t vdev_min_attach_size_func_t(vdev_t *vd); typedef uint64_t vdev_min_alloc_func_t(vdev_t *vd); typedef void vdev_io_start_func_t(zio_t *zio); typedef void vdev_io_done_func_t(zio_t *zio); @@ -107,6 +108,7 @@ typedef const struct vdev_ops { vdev_asize_func_t *vdev_op_psize_to_asize; vdev_asize_func_t *vdev_op_asize_to_psize; vdev_min_asize_func_t *vdev_op_min_asize; + vdev_min_attach_size_func_t *vdev_op_min_attach_size; vdev_min_alloc_func_t *vdev_op_min_alloc; vdev_io_start_func_t *vdev_op_io_start; vdev_io_done_func_t *vdev_op_io_done; @@ -631,7 +633,9 @@ extern void vdev_default_xlate(vdev_t *vd, const zfs_range_seg64_t *logical_rs, extern uint64_t vdev_default_psize(vdev_t *vd, uint64_t asize, uint64_t txg); extern uint64_t vdev_default_asize(vdev_t *vd, uint64_t psize, uint64_t txg); extern uint64_t vdev_default_min_asize(vdev_t *pvd, vdev_t *cvd); +extern uint64_t vdev_default_min_attach_size(vdev_t *vd); extern uint64_t vdev_get_min_asize(vdev_t *vd); +extern uint64_t vdev_get_min_attach_size(vdev_t *vd); extern void vdev_set_min_asize(vdev_t *vd); extern uint64_t vdev_get_nparity(vdev_t *vd); extern uint64_t vdev_get_ndisks(vdev_t *vd); diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 7074e0d442d2..a593891cfbac 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -1223,7 +1223,8 @@ zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool) strncmp(pool, "raidz", 5) == 0 || strncmp(pool, "draid", 5) == 0 || strncmp(pool, "spare", 5) == 0 || - strcmp(pool, "log") == 0)) { + strcmp(pool, "log") == 0 || + strncmp(pool, "anymirror", 9) == 0)) { if (hdl != NULL) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "name is reserved")); diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index 60ec56b4d1f6..da79854f0425 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -655,6 +655,15 @@ Logical ashift for file-based devices. .It Sy vdev_file_physical_ashift Ns = Ns Sy 9 Po 512 B Pc Pq u64 Physical ashift for file-based devices. . +.It Sy zfs_anyraid_min_tile_size Ns = Ns Sy 16 GiB Pq u64 +Minimum size of the tiles that anyraid will use to do its mapping. +Smaller tile sizes let data be spread more evenly across devices, and makes +smaller devices use more of their capacity. +Larger tile sizes allow for larger disks to be used in the future, since a given +device can only store 16384 tiles. +The minimum valid tile size is 16MiB, since a metaslab always needs to be able +to fit in a single tile. +. .It Sy zap_iterate_prefetch Ns = Ns Sy 1 Ns | Ns 0 Pq int If set, when we start iterating over a ZAP object, prefetch the entire object (all leaf blocks). diff --git a/man/man7/vdevprops.7 b/man/man7/vdevprops.7 index b54abcd3ecc9..54f6abe71ef6 100644 --- a/man/man7/vdevprops.7 +++ b/man/man7/vdevprops.7 @@ -194,6 +194,21 @@ If this device should perform new allocations, used to disable a device when it is scheduled for later removal. See .Xr zpool-remove 8 . +.It anyraid_tile_capacity +Only valid for +.Sy AnyRAID +vdevs and their leaf vdevs. +The number of physical tiles that the vdev can hold. +.It anyraid_tile_count +Only valid for +.Sy AnyRAID +vdevs and their leaf vdevs. +The number of physical tiles that are currently allocated on the vdev. +.It anyraid_tile_size +Only valid for +.Sy AnyRAID +vdevs and their leaf vdevs. +The size of the tiles in use on this vdev. .El .Ss User Properties In addition to the standard native properties, ZFS supports arbitrary user diff --git a/man/man7/zpoolconcepts.7 b/man/man7/zpoolconcepts.7 index 08016194db8a..6a8bff5a6086 100644 --- a/man/man7/zpoolconcepts.7 +++ b/man/man7/zpoolconcepts.7 @@ -165,7 +165,7 @@ An error is returned when the provided number of children differs. The number of distributed hot spares. Defaults to zero. .El -.It Sy anyraid , anyraid0 , anyraid1 , anyraid2 +.It Sy anymirror , anymirror0 , anymirror1 , anymirror2 A new device type that allows for mirror-parity redundancy while using devices of different sizes. An AnyRAID vdev works by dividing each of the underlying disks that make it up @@ -177,13 +177,17 @@ disks, while enabling maximum space usage by allocating more tiles from the disks with the most free space. In addition, the device can be expanded by attaching new disks, and new tiles will be allocated from those disks. -.Sy anyraid +The vdev class as a whole is referred to as AnyRAID; anymirror vdevs +specifically use mirror-style parity. +Future work will also add anyraidz, which will use the same basic tile +architecture, but use raidz-style parity. +.Sy anymirror is a synonym for -.Sy anyraid1 +.Sy anymirror1 , which is the 2-way mirror parity version (1 parity tile). -.Sy anyraid2 +.Sy anymirror2 is a 3-way mirror (2 parity tiles), while -.Sy anyraid0 +.Sy anymirror0 is striped (no parity tiles), and is primarily intended for testing. .It Sy spare A pseudo-vdev which keeps track of available hot spares for a pool. diff --git a/module/os/freebsd/zfs/vdev_geom.c b/module/os/freebsd/zfs/vdev_geom.c index bbd1dafc69be..c75f4443afd9 100644 --- a/module/os/freebsd/zfs/vdev_geom.c +++ b/module/os/freebsd/zfs/vdev_geom.c @@ -1288,6 +1288,7 @@ vdev_ops_t vdev_disk_ops = { .vdev_op_psize_to_asize = vdev_default_asize, .vdev_op_asize_to_psize = vdev_default_psize, .vdev_op_min_asize = vdev_default_min_asize, + .vdev_op_min_attach_size = vdev_default_min_attach_size, .vdev_op_min_alloc = NULL, .vdev_op_io_start = vdev_geom_io_start, .vdev_op_io_done = vdev_geom_io_done, diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c index 1bd3500e9f66..b4259313822c 100644 --- a/module/os/linux/zfs/vdev_disk.c +++ b/module/os/linux/zfs/vdev_disk.c @@ -1290,6 +1290,7 @@ vdev_ops_t vdev_disk_ops = { .vdev_op_asize_to_psize = vdev_default_psize, .vdev_op_psize_to_asize = vdev_default_asize, .vdev_op_min_asize = vdev_default_min_asize, + .vdev_op_min_attach_size = vdev_default_min_attach_size, .vdev_op_min_alloc = NULL, .vdev_op_io_start = vdev_disk_io_start, .vdev_op_io_done = vdev_disk_io_done, diff --git a/module/zcommon/zfs_namecheck.c b/module/zcommon/zfs_namecheck.c index deb0547c1084..7770f44c083e 100644 --- a/module/zcommon/zfs_namecheck.c +++ b/module/zcommon/zfs_namecheck.c @@ -445,7 +445,8 @@ pool_namecheck(const char *pool, namecheck_err_t *why, char *what) if (strcmp(pool, "mirror") == 0 || strcmp(pool, "raidz") == 0 || - strcmp(pool, "draid") == 0) { + strcmp(pool, "draid") == 0 || + strcmp(pool, "anymirror") == 0) { if (why) *why = NAME_ERR_RESERVED; return (-1); diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c index f841419a964b..8c3ac0ae0874 100644 --- a/module/zcommon/zpool_prop.c +++ b/module/zcommon/zpool_prop.c @@ -437,13 +437,13 @@ vdev_prop_init(void) PROP_READONLY, ZFS_TYPE_VDEV, "", "TRIMBYTE", B_FALSE, sfeatures); zprop_register_number(VDEV_PROP_ANYRAID_CAP_TILES, - "anyraid_region_capacity", 0, PROP_READONLY, ZFS_TYPE_VDEV, + "anyraid_tile_capacity", 0, PROP_READONLY, ZFS_TYPE_VDEV, "", "TILECAP", B_FALSE, sfeatures); zprop_register_number(VDEV_PROP_ANYRAID_NUM_TILES, - "anyraid_region_count", 0, PROP_READONLY, ZFS_TYPE_VDEV, + "anyraid_tile_count", 0, PROP_READONLY, ZFS_TYPE_VDEV, "", "NUMTILES", B_FALSE, sfeatures); zprop_register_number(VDEV_PROP_ANYRAID_TILE_SIZE, - "anyraid_region_size", 0, PROP_READONLY, ZFS_TYPE_VDEV, + "anyraid_tile_size", 0, PROP_READONLY, ZFS_TYPE_VDEV, "", "TILESIZE", B_FALSE, sfeatures); /* default numeric properties */ diff --git a/module/zfs/spa.c b/module/zfs/spa.c index f03fb68dc574..607fc82066b1 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -7949,10 +7949,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing, /* * Make sure the new device is big enough. */ - vdev_t *min_vdev = raidz ? oldvd->vdev_child[0] : oldvd; - if ((anyraid && newvd->vdev_asize < vdev_anyraid_min_newsize(min_vdev, - newvd->vdev_ashift)) || - (!anyraid && newvd->vdev_asize < vdev_get_min_asize(min_vdev))) + if (newvd->vdev_asize < vdev_get_min_attach_size(oldvd)) return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); /* diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index a48db085878b..e30375626669 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -346,6 +346,21 @@ vdev_derive_alloc_bias(const char *bias) return (alloc_bias); } +uint64_t +vdev_default_min_attach_size(vdev_t *vd) +{ + return (vdev_get_min_asize(vd)); +} + +uint64_t +vdev_get_min_attach_size(vdev_t *vd) +{ + vdev_t *pvd = vd->vdev_parent; + if (vd == vd->vdev_top) + pvd = vd; + return (pvd->vdev_ops->vdev_op_min_attach_size(pvd)); +} + uint64_t vdev_default_psize(vdev_t *vd, uint64_t asize, uint64_t txg) { @@ -6732,13 +6747,13 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) for (int i = 0; i < vd->vdev_children; i++) { total += var->vd_children[i] - ->van_capacity; + ->van_capacity + 1; } } else if (pvd && pvd->vdev_ops == &vdev_anyraid_ops) { vdev_anyraid_t *var = pvd->vdev_tsd; total = var->vd_children[vd->vdev_id] - ->van_capacity; + ->van_capacity + 1; } else { continue; } diff --git a/module/zfs/vdev_anyraid.c b/module/zfs/vdev_anyraid.c index 1746c207e967..3d87cfb923f4 100644 --- a/module/zfs/vdev_anyraid.c +++ b/module/zfs/vdev_anyraid.c @@ -157,7 +157,7 @@ vdev_anyraid_init(spa_t *spa, nvlist_t *nv, void **tsd) nvlist_t **child; int error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children); - if (error != 0 || children > UINT8_MAX) + if (error != 0 || children > VDEV_ANYRAID_MAX_DISKS) return (SET_ERROR(EINVAL)); uint64_t nparity; @@ -236,34 +236,34 @@ vdev_anyraid_config_generate(vdev_t *vd, nvlist_t *nv) */ static void create_tile_entry(vdev_anyraid_t *var, anyraid_map_loc_entry_t *amle, - uint8_t *pat_cnt, anyraid_tile_t **out_ar, uint32_t *cur_tile) + uint8_t *pat_cnt, anyraid_tile_t **out_at, uint32_t *cur_tile) { uint8_t disk = amle_get_disk(amle); uint16_t offset = amle_get_offset(amle); - anyraid_tile_t *ar = *out_ar; + anyraid_tile_t *at = *out_at; if (*pat_cnt == 0) { - ar = kmem_alloc(sizeof (*ar), KM_SLEEP); - ar->at_tile_id = *cur_tile; - avl_add(&var->vd_tile_map, ar); - list_create(&ar->at_list, + at = kmem_alloc(sizeof (*at), KM_SLEEP); + at->at_tile_id = *cur_tile; + avl_add(&var->vd_tile_map, at); + list_create(&at->at_list, sizeof (anyraid_tile_node_t), offsetof(anyraid_tile_node_t, atn_node)); (*cur_tile)++; } - anyraid_tile_node_t *arn = kmem_alloc(sizeof (*arn), KM_SLEEP); - arn->atn_disk = disk; - arn->atn_offset = offset; - list_insert_tail(&ar->at_list, arn); + anyraid_tile_node_t *atn = kmem_alloc(sizeof (*atn), KM_SLEEP); + atn->atn_disk = disk; + atn->atn_offset = offset; + list_insert_tail(&at->at_list, atn); *pat_cnt = (*pat_cnt + 1) % (var->vd_nparity + 1); vdev_anyraid_node_t *van = var->vd_children[disk]; avl_remove(&var->vd_children_tree, van); van->van_next_offset = MAX(van->van_next_offset, offset + 1); avl_add(&var->vd_children_tree, van); - *out_ar = ar; + *out_at = at; } static void @@ -576,7 +576,7 @@ anyraid_open_existing(vdev_t *vd, uint64_t child, uint16_t **child_capacities) uint32_t size = sizeof (anyraid_map_loc_entry_t); uint8_t *map_buf = NULL; uint8_t pat_cnt = 0; - anyraid_tile_t *ar = NULL; + anyraid_tile_t *at = NULL; for (uint32_t off = 0; off < map_length; off += size) { if (checkpoint_rb && cur_tile > var->vd_checkpoint_tile && pat_cnt == 0) @@ -614,7 +614,7 @@ anyraid_open_existing(vdev_t *vd, uint64_t child, uint16_t **child_capacities) case AMET_LOC: { anyraid_map_loc_entry_t *amle = &entry->ame_u.ame_amle; - create_tile_entry(var, amle, &pat_cnt, &ar, + create_tile_entry(var, amle, &pat_cnt, &at, &cur_tile); break; } @@ -693,7 +693,7 @@ anyraid_calculate_size(vdev_t *vd) * size, so we need a tile to hold at least enough to store a * max-size block, or we'll assert in that code. */ - if (var->vd_tile_size <= SPA_MAXBLOCKSIZE) + if (var->vd_tile_size < SPA_MAXBLOCKSIZE) return (SET_ERROR(ENOSPC)); return (0); } @@ -855,6 +855,8 @@ vdev_anyraid_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, return (lasterror); } + uint64_t max_size = VDEV_ANYRAID_MAX_TPD * var->vd_tile_size; + /* * Calculate the number of tiles each child could fit, then use that * to calculate the asize and min_asize. @@ -867,16 +869,20 @@ vdev_anyraid_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, uint64_t casize; if (cvd->vdev_open_error == 0) { vdev_set_min_asize(cvd); - casize = cvd->vdev_asize - - VDEV_ANYRAID_TOTAL_MAP_SIZE(cvd->vdev_ashift); + casize = MIN(max_size, cvd->vdev_asize - + VDEV_ANYRAID_TOTAL_MAP_SIZE(cvd->vdev_ashift)); } else { ASSERT(child_capacities); - casize = child_capacities[c] * var->vd_tile_size; + casize = (child_capacities[c] + 1) * var->vd_tile_size; } num_tiles[c] = casize / var->vd_tile_size; avl_remove(&var->vd_children_tree, var->vd_children[c]); - var->vd_children[c]->van_capacity = num_tiles[c]; + /* + * We store the capacity minus 1, since a vdev can never have 0 + * and they can have (which would overflow a uint16_t). + */ + var->vd_children[c]->van_capacity = num_tiles[c] - 1; avl_add(&var->vd_children_tree, var->vd_children[c]); } *asize = calculate_asize(vd, num_tiles); @@ -886,10 +892,10 @@ vdev_anyraid_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, uint64_t cmasize; if (cvd->vdev_open_error == 0) { - cmasize = cvd->vdev_max_asize - - VDEV_ANYRAID_TOTAL_MAP_SIZE(cvd->vdev_ashift); + cmasize = MIN(max_size, cvd->vdev_max_asize - + VDEV_ANYRAID_TOTAL_MAP_SIZE(cvd->vdev_ashift)); } else { - cmasize = child_capacities[c] * var->vd_tile_size; + cmasize = (child_capacities[c] + 1) * var->vd_tile_size; } num_tiles[c] = cmasize / var->vd_tile_size; @@ -966,19 +972,19 @@ vdev_anyraid_mirror_start(zio_t *zio, anyraid_tile_t *tile) B_FALSE); uint64_t rsize = var->vd_tile_size; - anyraid_tile_node_t *arn = list_head(&tile->at_list); + anyraid_tile_node_t *atn = list_head(&tile->at_list); for (int c = 0; c < mm->mm_children; c++) { - ASSERT(arn); + ASSERT(atn); mirror_child_t *mc = &mm->mm_child[c]; - mc->mc_vd = vd->vdev_child[arn->atn_disk]; + mc->mc_vd = vd->vdev_child[atn->atn_disk]; mc->mc_offset = VDEV_ANYRAID_TOTAL_MAP_SIZE(vd->vdev_ashift) + - arn->atn_offset * rsize + zio->io_offset % rsize; + atn->atn_offset * rsize + zio->io_offset % rsize; ASSERT3U(mc->mc_offset, <, mc->mc_vd->vdev_psize - VDEV_LABEL_END_SIZE); mm->mm_rebuilding = mc->mc_rebuilding = B_FALSE; - arn = list_next(&tile->at_list, arn); + atn = list_next(&tile->at_list, atn); } - ASSERT(arn == NULL); + ASSERT(atn == NULL); zio->io_vsd = mm; zio->io_vsd_ops = &vdev_mirror_vsd_ops; @@ -1052,12 +1058,12 @@ vdev_anyraid_io_start(zio_t *zio) vans[i] = avl_first(&var->vd_children_tree); avl_remove(&var->vd_children_tree, vans[i]); - anyraid_tile_node_t *arn = - kmem_alloc(sizeof (*arn), KM_SLEEP); - arn->atn_disk = vans[i]->van_id; - arn->atn_offset = + anyraid_tile_node_t *atn = + kmem_alloc(sizeof (*atn), KM_SLEEP); + atn->atn_disk = vans[i]->van_id; + atn->atn_offset = vans[i]->van_next_offset++; - list_insert_tail(&tile->at_list, arn); + list_insert_tail(&tile->at_list, atn); } for (int i = 0; i < width; i++) avl_add(&var->vd_children_tree, vans[i]); @@ -1076,9 +1082,9 @@ vdev_anyraid_io_start(zio_t *zio) return; } - anyraid_tile_node_t *arn = list_head(&tile->at_list); - vdev_t *cvd = vd->vdev_child[arn->atn_disk]; - uint64_t child_offset = arn->atn_offset * rsize + + anyraid_tile_node_t *atn = list_head(&tile->at_list); + vdev_t *cvd = vd->vdev_child[atn->atn_disk]; + uint64_t child_offset = atn->atn_offset * rsize + zio->io_offset % rsize; child_offset += VDEV_ANYRAID_TOTAL_MAP_SIZE(vd->vdev_ashift); @@ -1144,9 +1150,9 @@ vdev_anyraid_need_resilver(vdev_t *vd, const dva_t *dva, size_t psize, rw_exit(&var->vd_lock); ASSERT(tile); - for (anyraid_tile_node_t *arn = list_head(&tile->at_list); - arn != NULL; arn = list_next(&tile->at_list, arn)) { - vdev_t *cvd = vd->vdev_child[arn->atn_disk]; + for (anyraid_tile_node_t *atn = list_head(&tile->at_list); + atn != NULL; atn = list_next(&tile->at_list, atn)) { + vdev_t *cvd = vd->vdev_child[atn->atn_disk]; if (!vdev_dtl_empty(cvd, DTL_PARTIAL)) return (B_TRUE); @@ -1185,17 +1191,17 @@ vdev_anyraid_xlate(vdev_t *cvd, const zfs_range_seg64_t *logical_rs, physical_rs->rs_start = physical_rs->rs_end = 0; return; } - anyraid_tile_node_t *arn = list_head(&tile->at_list); - for (; arn != NULL; arn = list_next(&tile->at_list, arn)) - if (anyraidvd->vdev_child[arn->atn_disk] == cvd) + anyraid_tile_node_t *atn = list_head(&tile->at_list); + for (; atn != NULL; atn = list_next(&tile->at_list, atn)) + if (anyraidvd->vdev_child[atn->atn_disk] == cvd) break; // The tile exists, but isn't stored on this child - if (arn == NULL) { + if (atn == NULL) { physical_rs->rs_start = physical_rs->rs_end = 0; return; } - uint64_t child_offset = arn->atn_offset * rsize + + uint64_t child_offset = atn->atn_offset * rsize + logical_rs->rs_start % rsize; child_offset += VDEV_ANYRAID_TOTAL_MAP_SIZE(anyraidvd->vdev_ashift); uint64_t size = logical_rs->rs_end - logical_rs->rs_start; @@ -1223,12 +1229,12 @@ vdev_anyraid_ndisks(vdev_t *vd) * Functions related to syncing out the tile map each TXG. */ static boolean_t -map_write_loc_entry(anyraid_tile_node_t *arn, void *buf, uint32_t *offset) +map_write_loc_entry(anyraid_tile_node_t *atn, void *buf, uint32_t *offset) { anyraid_map_loc_entry_t *entry = (void *)((char *)buf + *offset); amle_set_type(entry); - amle_set_disk(entry, arn->atn_disk); - amle_set_offset(entry, arn->atn_offset); + amle_set_disk(entry, atn->atn_disk); + amle_set_offset(entry, atn->atn_offset); *offset += sizeof (*entry); return (*offset == SPA_MAXBLOCKSIZE); } @@ -1361,9 +1367,9 @@ vdev_anyraid_write_map_sync(vdev_t *vd, zio_t *pio, uint64_t txg, status == VDEV_CONFIG_REWINDING_CHECKPOINT) { var->vd_checkpoint_tile = UINT32_MAX; } else if (status == VDEV_CONFIG_CREATING_CHECKPOINT) { - anyraid_tile_t *ar = avl_last(&var->vd_tile_map); - ASSERT(ar); - var->vd_checkpoint_tile = ar->at_tile_id; + anyraid_tile_t *at = avl_last(&var->vd_tile_map); + ASSERT(at); + var->vd_checkpoint_tile = at->at_tile_id; } rw_exit(&var->vd_lock); @@ -1417,6 +1423,17 @@ vdev_anyraid_write_map_sync(vdev_t *vd, zio_t *pio, uint64_t txg, abd_free(header_abd); } +static uint64_t +vdev_anyraid_min_attach_size(vdev_t *vd) +{ + ASSERT3P(vd->vdev_ops, ==, &vdev_anyraid_ops); + ASSERT3U(spa_config_held(vd->vdev_spa, SCL_ALL, RW_READER), !=, 0); + vdev_anyraid_t *var = vd->vdev_tsd; + ASSERT(var->vd_tile_size); + return (VDEV_ANYRAID_TOTAL_MAP_SIZE(vd->vdev_ashift) + + var->vd_tile_size); +} + static uint64_t vdev_anyraid_min_asize(vdev_t *pvd, vdev_t *cvd) { @@ -1428,24 +1445,12 @@ vdev_anyraid_min_asize(vdev_t *pvd, vdev_t *cvd) rw_enter(&var->vd_lock, RW_READER); uint64_t size = VDEV_ANYRAID_TOTAL_MAP_SIZE(cvd->vdev_ashift) + - var->vd_children[cvd->vdev_id]->van_next_offset * + (var->vd_children[cvd->vdev_id]->van_capacity + 1) * var->vd_tile_size; rw_exit(&var->vd_lock); return (size); } -/* - * Used by the attach logic to determine if a device is big enough to be - * usefully attached. - */ -uint64_t -vdev_anyraid_min_newsize(vdev_t *vd, uint64_t ashift) -{ - vdev_anyraid_t *var = vd->vdev_tsd; - return (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE + - VDEV_ANYRAID_TOTAL_MAP_SIZE(ashift) + var->vd_tile_size); -} - void vdev_anyraid_expand(vdev_t *tvd, vdev_t *newvd) { @@ -1460,9 +1465,10 @@ vdev_anyraid_expand(vdev_t *tvd, vdev_t *newvd) KM_SLEEP); newchild->van_id = newvd->vdev_id; newchild->van_next_offset = 0; - newchild->van_capacity = (newvd->vdev_asize - - VDEV_ANYRAID_TOTAL_MAP_SIZE(newvd->vdev_ashift)) / - var->vd_tile_size; + uint64_t max_size = VDEV_ANYRAID_MAX_TPD * var->vd_tile_size; + newchild->van_capacity = (MIN(max_size, (newvd->vdev_asize - + VDEV_ANYRAID_TOTAL_MAP_SIZE(newvd->vdev_ashift))) / + var->vd_tile_size) - 1; rw_enter(&var->vd_lock, RW_WRITER); memcpy(nc, var->vd_children, old_children * sizeof (*nc)); kmem_free(var->vd_children, old_children * sizeof (*nc)); @@ -1520,6 +1526,7 @@ vdev_ops_t vdev_anyraid_ops = { .vdev_op_psize_to_asize = vdev_default_asize, .vdev_op_asize_to_psize = vdev_default_asize, .vdev_op_min_asize = vdev_anyraid_min_asize, + .vdev_op_min_attach_size = vdev_anyraid_min_attach_size, .vdev_op_min_alloc = NULL, .vdev_op_io_start = vdev_anyraid_io_start, .vdev_op_io_done = vdev_anyraid_io_done, diff --git a/module/zfs/vdev_draid.c b/module/zfs/vdev_draid.c index f410bfb012ef..857d76413166 100644 --- a/module/zfs/vdev_draid.c +++ b/module/zfs/vdev_draid.c @@ -1175,6 +1175,13 @@ vdev_draid_min_asize(vdev_t *pvd, vdev_t *cvd) (pvd->vdev_min_asize + vdc->vdc_ndisks - 1) / (vdc->vdc_ndisks)); } +static uint64_t +vdev_draid_min_attach_size(vdev_t *vd) +{ + ASSERT3U(vd->vdev_top, ==, vd); + return (vdev_draid_min_asize(vd, vd->vdev_child[0])); +} + /* * When using dRAID the minimum allocation size is determined by the number * of data disks in the redundancy group. Full stripes are always used. @@ -2344,6 +2351,7 @@ vdev_ops_t vdev_draid_ops = { .vdev_op_psize_to_asize = vdev_draid_psize_to_asize, .vdev_op_asize_to_psize = vdev_draid_asize_to_psize, .vdev_op_min_asize = vdev_draid_min_asize, + .vdev_op_min_attach_size = vdev_draid_min_attach_size, .vdev_op_min_alloc = vdev_draid_min_alloc, .vdev_op_io_start = vdev_draid_io_start, .vdev_op_io_done = vdev_draid_io_done, @@ -2836,6 +2844,7 @@ vdev_ops_t vdev_draid_spare_ops = { .vdev_op_psize_to_asize = vdev_default_asize, .vdev_op_asize_to_psize = vdev_default_psize, .vdev_op_min_asize = vdev_default_min_asize, + .vdev_op_min_attach_size = vdev_default_min_attach_size, .vdev_op_min_alloc = NULL, .vdev_op_io_start = vdev_draid_spare_io_start, .vdev_op_io_done = vdev_draid_spare_io_done, diff --git a/module/zfs/vdev_file.c b/module/zfs/vdev_file.c index 20b4db65ec06..3cae11e436d0 100644 --- a/module/zfs/vdev_file.c +++ b/module/zfs/vdev_file.c @@ -317,6 +317,7 @@ vdev_ops_t vdev_file_ops = { .vdev_op_psize_to_asize = vdev_default_asize, .vdev_op_asize_to_psize = vdev_default_psize, .vdev_op_min_asize = vdev_default_min_asize, + .vdev_op_min_attach_size = vdev_default_min_attach_size, .vdev_op_min_alloc = NULL, .vdev_op_io_start = vdev_file_io_start, .vdev_op_io_done = vdev_file_io_done, diff --git a/module/zfs/vdev_indirect.c b/module/zfs/vdev_indirect.c index 7538f471e63c..5f2b55047149 100644 --- a/module/zfs/vdev_indirect.c +++ b/module/zfs/vdev_indirect.c @@ -1870,6 +1870,7 @@ vdev_ops_t vdev_indirect_ops = { .vdev_op_psize_to_asize = vdev_default_asize, .vdev_op_asize_to_psize = vdev_default_psize, .vdev_op_min_asize = vdev_default_min_asize, + .vdev_op_min_attach_size = vdev_default_min_attach_size, .vdev_op_min_alloc = NULL, .vdev_op_io_start = vdev_indirect_io_start, .vdev_op_io_done = vdev_indirect_io_done, diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c index 7b645405ab82..8aeff63e1dbe 100644 --- a/module/zfs/vdev_mirror.c +++ b/module/zfs/vdev_mirror.c @@ -957,6 +957,7 @@ vdev_ops_t vdev_mirror_ops = { .vdev_op_psize_to_asize = vdev_default_asize, .vdev_op_asize_to_psize = vdev_default_psize, .vdev_op_min_asize = vdev_default_min_asize, + .vdev_op_min_attach_size = vdev_default_min_attach_size, .vdev_op_min_alloc = NULL, .vdev_op_io_start = vdev_mirror_io_start, .vdev_op_io_done = vdev_mirror_io_done, @@ -983,6 +984,7 @@ vdev_ops_t vdev_replacing_ops = { .vdev_op_psize_to_asize = vdev_default_asize, .vdev_op_asize_to_psize = vdev_default_psize, .vdev_op_min_asize = vdev_default_min_asize, + .vdev_op_min_attach_size = vdev_default_min_attach_size, .vdev_op_min_alloc = NULL, .vdev_op_io_start = vdev_mirror_io_start, .vdev_op_io_done = vdev_mirror_io_done, @@ -1009,6 +1011,7 @@ vdev_ops_t vdev_spare_ops = { .vdev_op_psize_to_asize = vdev_default_asize, .vdev_op_asize_to_psize = vdev_default_psize, .vdev_op_min_asize = vdev_default_min_asize, + .vdev_op_min_attach_size = vdev_default_min_attach_size, .vdev_op_min_alloc = NULL, .vdev_op_io_start = vdev_mirror_io_start, .vdev_op_io_done = vdev_mirror_io_done, diff --git a/module/zfs/vdev_missing.c b/module/zfs/vdev_missing.c index c62faef2d05c..ac6866bdcec0 100644 --- a/module/zfs/vdev_missing.c +++ b/module/zfs/vdev_missing.c @@ -88,6 +88,7 @@ vdev_ops_t vdev_missing_ops = { .vdev_op_psize_to_asize = vdev_default_asize, .vdev_op_asize_to_psize = vdev_default_psize, .vdev_op_min_asize = vdev_default_min_asize, + .vdev_op_min_attach_size = vdev_default_min_attach_size, .vdev_op_min_alloc = NULL, .vdev_op_io_start = vdev_missing_io_start, .vdev_op_io_done = vdev_missing_io_done, @@ -114,6 +115,7 @@ vdev_ops_t vdev_hole_ops = { .vdev_op_psize_to_asize = vdev_default_asize, .vdev_op_asize_to_psize = vdev_default_psize, .vdev_op_min_asize = vdev_default_min_asize, + .vdev_op_min_attach_size = vdev_default_min_attach_size, .vdev_op_min_alloc = NULL, .vdev_op_io_start = vdev_missing_io_start, .vdev_op_io_done = vdev_missing_io_done, diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index d4038982437d..18c6b9a33310 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -2339,6 +2339,13 @@ vdev_raidz_min_asize(vdev_t *pvd, vdev_t *cvd) pvd->vdev_children); } +static uint64_t +vdev_raidz_min_attach_size(vdev_t *vd) +{ + ASSERT3U(vd->vdev_top, ==, vd); + return (vdev_raidz_min_asize(vd, vd->vdev_child[0])); +} + /* * return B_TRUE if a read should be skipped due to being too slow. * @@ -5467,6 +5474,7 @@ vdev_ops_t vdev_raidz_ops = { .vdev_op_psize_to_asize = vdev_raidz_psize_to_asize, .vdev_op_asize_to_psize = vdev_raidz_asize_to_psize, .vdev_op_min_asize = vdev_raidz_min_asize, + .vdev_op_min_attach_size = vdev_raidz_min_attach_size, .vdev_op_min_alloc = NULL, .vdev_op_io_start = vdev_raidz_io_start, .vdev_op_io_done = vdev_raidz_io_done, diff --git a/module/zfs/vdev_root.c b/module/zfs/vdev_root.c index 21a81d6d25b9..55e059e1d8b0 100644 --- a/module/zfs/vdev_root.c +++ b/module/zfs/vdev_root.c @@ -150,6 +150,7 @@ vdev_ops_t vdev_root_ops = { .vdev_op_psize_to_asize = vdev_default_asize, .vdev_op_asize_to_psize = vdev_default_psize, .vdev_op_min_asize = vdev_default_min_asize, + .vdev_op_min_attach_size = vdev_default_min_attach_size, .vdev_op_min_alloc = NULL, .vdev_op_io_start = NULL, /* not applicable to the root */ .vdev_op_io_done = NULL, /* not applicable to the root */ diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index c9c36b9e15b5..06b215f2ae91 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -428,8 +428,9 @@ tests = ['zpool_create_001_pos', 'zpool_create_002_pos', 'zpool_create_020_pos', 'zpool_create_021_pos', 'zpool_create_022_pos', 'zpool_create_023_neg', 'zpool_create_024_pos', 'zpool_create_anyraid_001_pos', 'zpool_create_anyraid_002_pos', - 'zpool_create_anyraid_003_pos', - 'zpool_create_encrypted', 'zpool_create_crypt_combos', + 'zpool_create_anyraid_003_pos', 'zpool_create_anyraid_004_pos', + 'zpool_create_anyraid_005_neg', 'zpool_create_encrypted', + 'zpool_create_crypt_combos', 'zpool_create_draid_001_pos', 'zpool_create_draid_002_pos', 'zpool_create_draid_003_pos', 'zpool_create_draid_004_pos', 'zpool_create_features_001_pos', 'zpool_create_features_002_pos', diff --git a/tests/runfiles/sanity.run b/tests/runfiles/sanity.run index 16a55a537712..35fd0b13b3c9 100644 --- a/tests/runfiles/sanity.run +++ b/tests/runfiles/sanity.run @@ -269,7 +269,9 @@ tests = ['zpool_create_001_pos', 'zpool_create_002_pos', 'zpool_create_012_neg', 'zpool_create_014_neg', 'zpool_create_015_neg', 'zpool_create_017_neg', 'zpool_create_018_pos', 'zpool_create_019_pos', 'zpool_create_020_pos', 'zpool_create_021_pos', 'zpool_create_022_pos', - 'zpool_create_encrypted', + 'zpool_create_anyraid_001_pos', 'zpool_create_anyraid_002_pos', + 'zpool_create_anyraid_003_pos', 'zpool_create_anyraid_004_pos', + 'zpool_create_anyraid_005_neg', 'zpool_create_encrypted', 'zpool_create_features_001_pos', 'zpool_create_features_002_pos', 'zpool_create_features_003_pos', 'zpool_create_features_004_neg', 'zpool_create_features_005_pos'] diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index c50c5c51677f..bf0b48c8724b 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -1088,6 +1088,8 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zpool_create/zpool_create_anyraid_001_pos.ksh \ functional/cli_root/zpool_create/zpool_create_anyraid_002_pos.ksh \ functional/cli_root/zpool_create/zpool_create_anyraid_003_pos.ksh \ + functional/cli_root/zpool_create/zpool_create_anyraid_004_pos.ksh \ + functional/cli_root/zpool_create/zpool_create_anyraid_005_neg.ksh \ functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh \ functional/cli_root/zpool_create/zpool_create_draid_001_pos.ksh \ functional/cli_root/zpool_create/zpool_create_draid_002_pos.ksh \ diff --git a/tests/zfs-tests/tests/functional/anyraid/anyraid_checkpoint.ksh b/tests/zfs-tests/tests/functional/anyraid/anyraid_checkpoint.ksh index 26876894b004..5cafab5b3f06 100755 --- a/tests/zfs-tests/tests/functional/anyraid/anyraid_checkpoint.ksh +++ b/tests/zfs-tests/tests/functional/anyraid/anyraid_checkpoint.ksh @@ -47,7 +47,7 @@ cleanup() { log_onexit cleanup -log_must create_pool $TESTPOOL anyraid1 $DISKS +log_must create_pool $TESTPOOL anymirror1 $DISKS log_assert "Anyraid works correctly with checkpoints" diff --git a/tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_001_pos.ksh b/tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_001_pos.ksh index ce16d6db482d..a97621aab1ef 100755 --- a/tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_001_pos.ksh @@ -43,11 +43,11 @@ log_assert "AnyRAID mirror1 can survive having 1 failed disk" log_must create_sparse_files "disk" 3 $DEVSIZE -clean_mirror_spec_cases "anyraid1 $disk0 $disk1" \ +clean_mirror_spec_cases "anymirror1 $disk0 $disk1" \ "$disk0" \ "$disk1" -clean_mirror_spec_cases "anyraid1 $disk0 $disk1 $disk2" \ +clean_mirror_spec_cases "anymirror1 $disk0 $disk1 $disk2" \ "$disk0" \ "$disk1" \ "$disk2" diff --git a/tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_002_pos.ksh b/tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_002_pos.ksh index 60f94cb99af2..2edbac7773c3 100755 --- a/tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_002_pos.ksh @@ -43,7 +43,7 @@ log_assert "AnyRAID mirror2 can survive having 1-2 failed disks" log_must create_sparse_files "disk" 4 $DEVSIZE -clean_mirror_spec_cases "anyraid2 $disk0 $disk1 $disk2" \ +clean_mirror_spec_cases "anymirror2 $disk0 $disk1 $disk2" \ "$disk0" \ "$disk1" \ "$disk2" \ @@ -51,7 +51,7 @@ clean_mirror_spec_cases "anyraid2 $disk0 $disk1 $disk2" \ "\"$disk0 $disk2\"" \ "\"$disk1 $disk2\"" -clean_mirror_spec_cases "anyraid2 $disk0 $disk1 $disk2 $disk3" \ +clean_mirror_spec_cases "anymirror2 $disk0 $disk1 $disk2 $disk3" \ "$disk0" \ "$disk1" \ "$disk2" \ diff --git a/tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_003_pos.ksh b/tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_003_pos.ksh index 85393052d861..05d6606db03c 100755 --- a/tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/anyraid/anyraid_clean_mirror_003_pos.ksh @@ -43,7 +43,7 @@ log_assert "AnyRAID mirror3 can survive having 1-3 failed disks" log_must create_sparse_files "disk" 4 $DEVSIZE -clean_mirror_spec_cases "anyraid3 $disk0 $disk1 $disk2 $disk3" \ +clean_mirror_spec_cases "anymirror3 $disk0 $disk1 $disk2 $disk3" \ "$disk0" \ "$disk1" \ "$disk2" \ diff --git a/tests/zfs-tests/tests/functional/anyraid/anyraid_faildisk_write_replace_resilver.ksh b/tests/zfs-tests/tests/functional/anyraid/anyraid_faildisk_write_replace_resilver.ksh index 5227d480e14a..0fa16b3ce3b2 100755 --- a/tests/zfs-tests/tests/functional/anyraid/anyraid_faildisk_write_replace_resilver.ksh +++ b/tests/zfs-tests/tests/functional/anyraid/anyraid_faildisk_write_replace_resilver.ksh @@ -49,13 +49,13 @@ cleanup() { log_onexit cleanup -# anyraid1 +# anymirror1 for replace_flags in '' '-s'; do log_must create_sparse_files "disk" 3 $DEVSIZE log_must create_sparse_files "spare" 1 $DEVSIZE - log_must zpool create -f $TESTPOOL anyraid1 $disks + log_must zpool create -f $TESTPOOL anymirror1 $disks log_must zfs set primarycache=none $TESTPOOL # Write initial data diff --git a/tests/zfs-tests/tests/functional/anyraid/anyraid_offline_write_online_resilver.ksh b/tests/zfs-tests/tests/functional/anyraid/anyraid_offline_write_online_resilver.ksh index f19115b12e43..f36efc443f79 100755 --- a/tests/zfs-tests/tests/functional/anyraid/anyraid_offline_write_online_resilver.ksh +++ b/tests/zfs-tests/tests/functional/anyraid/anyraid_offline_write_online_resilver.ksh @@ -48,10 +48,10 @@ cleanup() { log_onexit cleanup -# anyraid1 +# anymirror1 log_must create_sparse_files "disk" 3 $DEVSIZE -log_must zpool create -f $TESTPOOL anyraid1 $disks +log_must zpool create -f $TESTPOOL anymirror1 $disks log_must zpool offline $TESTPOOL $disk0 log_must check_state $TESTPOOL $disk0 "offline" @@ -70,10 +70,10 @@ log_must check_state $TESTPOOL "" "online" log_must destroy_pool $TESTPOOL -# anyraid2 +# anymirror2 log_must create_sparse_files "disk" 5 $DEVSIZE -log_must zpool create -f $TESTPOOL anyraid2 $disks +log_must zpool create -f $TESTPOOL anymirror2 $disks log_must zpool offline $TESTPOOL $disk0 log_must zpool offline $TESTPOOL $disk1 @@ -96,10 +96,10 @@ log_must check_state $TESTPOOL "" "online" log_must destroy_pool $TESTPOOL -# anyraid3 +# anymirror3 log_must create_sparse_files "disk" 7 $DEVSIZE -log_must zpool create -f $TESTPOOL anyraid3 $disks +log_must zpool create -f $TESTPOOL anymirror3 $disks log_must zpool offline $TESTPOOL $disk0 log_must zpool offline $TESTPOOL $disk1 diff --git a/tests/zfs-tests/tests/functional/anyraid/anyraid_special_vdev_001_pos.ksh b/tests/zfs-tests/tests/functional/anyraid/anyraid_special_vdev_001_pos.ksh index c316ea1039c8..f481b300a4c2 100755 --- a/tests/zfs-tests/tests/functional/anyraid/anyraid_special_vdev_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/anyraid/anyraid_special_vdev_001_pos.ksh @@ -54,7 +54,7 @@ log_must create_sparse_files "sdisk" 2 $DEVSIZE typeset oldcksum typeset newcksum for parity in {0..3}; do - log_must zpool create -f $TESTPOOL anyraid$parity $disks special mirror $sdisks + log_must zpool create -f $TESTPOOL anymirror$parity $disks special mirror $sdisks log_must poolexists $TESTPOOL log_must dd if=/dev/urandom of=/$TESTPOOL/file.bin bs=1M count=128 diff --git a/tests/zfs-tests/tests/functional/anyraid/anyraid_special_vdev_002_pos.ksh b/tests/zfs-tests/tests/functional/anyraid/anyraid_special_vdev_002_pos.ksh index 6adea12dbfcc..c00b26d37f2c 100755 --- a/tests/zfs-tests/tests/functional/anyraid/anyraid_special_vdev_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/anyraid/anyraid_special_vdev_002_pos.ksh @@ -54,7 +54,8 @@ log_must create_sparse_files "sdisk" 4 $DEVSIZE typeset oldcksum typeset newcksum for parity in {0..3}; do - log_must zpool create $TESTPOOL anyraid$parity $disks special anyraid$parity $sdisks + log_must zpool create $TESTPOOL anymirror$parity $disks special \ + anymirror$parity $sdisks log_must poolexists $TESTPOOL log_must dd if=/dev/urandom of=/$TESTPOOL/file.bin bs=1M count=128 diff --git a/tests/zfs-tests/tests/functional/anyraid/anyraid_tile_layout.ksh b/tests/zfs-tests/tests/functional/anyraid/anyraid_tile_layout.ksh index 89f6679353a5..a405745e493c 100755 --- a/tests/zfs-tests/tests/functional/anyraid/anyraid_tile_layout.ksh +++ b/tests/zfs-tests/tests/functional/anyraid/anyraid_tile_layout.ksh @@ -32,7 +32,7 @@ # Anyraid disks intelligently select which tiles to use # # STRATEGY: -# 1. Create an anyraid1 vdev with 1 large disk and 2 small disks +# 1. Create an anymirror1 vdev with 1 large disk and 2 small disks # 2. Verify that the full space can be used # @@ -54,7 +54,7 @@ set_tunable64 ANYRAID_MIN_TILE_SIZE 67108864 log_assert "Anyraid disks intelligently select which tiles to use" -log_must create_pool $TESTPOOL2 anyraid1 /$TESTPOOL/vdev_file.{0,1,2,3} +log_must create_pool $TESTPOOL2 anymirror1 /$TESTPOOL/vdev_file.{0,1,2,3} cap=$(zpool get -Hp -o value size $TESTPOOL2) [[ "$cap" -eq $((9 * 64 * 1024 * 1024)) ]] || \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_001_pos.ksh index bd4bce221568..fadf79272fc8 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_001_pos.ksh @@ -55,7 +55,7 @@ log_assert "'zpool add ...' can add devices to the pool." log_onexit cleanup -set -A keywords "" "mirror" "raidz" "raidz1" "anyraid" "anyraid1" "anyraid2" "anyraid3" "draid:1s" "draid1:1s" "spare" +set -A keywords "" "mirror" "raidz" "raidz1" "anyraid" "anymirror1" "anymirror2" "anymirror3" "draid:1s" "draid1:1s" "spare" create_sparse_files "disk" 4 $MINVDEVSIZE2 create_sparse_files "extradisk" 4 $MINVDEVSIZE2 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_attach/zpool_attach_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_attach/zpool_attach_002_pos.ksh index aae9a8605ff2..8ab23d7a3598 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_attach/zpool_attach_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_attach/zpool_attach_002_pos.ksh @@ -63,8 +63,8 @@ log_must zpool attach $TESTPOOL2 /$TESTPOOL/vdev_file.0 /$TESTPOOL/vdev_file.2 log_must eval "zpool list -v $TESTPOOL2 | grep \" .*_file.2\"" log_must zpool destroy $TESTPOOL2 -log_must create_pool $TESTPOOL2 anyraid1 /$TESTPOOL/vdev_file.{0,1,2} -log_must zpool attach $TESTPOOL2 anyraid-0 /$TESTPOOL/vdev_file.3 +log_must create_pool $TESTPOOL2 anymirror1 /$TESTPOOL/vdev_file.{0,1,2} +log_must zpool attach $TESTPOOL2 anymirror-0 /$TESTPOOL/vdev_file.3 log_must eval "zpool list -v $TESTPOOL2 | grep \" .*_file.3\"" log_pass "'zpool attach' works to expand mirrors and anyraid vdevs" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_attach/zpool_attach_003_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_attach/zpool_attach_003_pos.ksh index 67ac4c2e7bb3..2287e8c83b99 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_attach/zpool_attach_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_attach/zpool_attach_003_pos.ksh @@ -32,12 +32,12 @@ # 'zpool attach' expands size correctly with anyraid vdevs. # # STRATEGY: -# 1. Create an anyraid1 vdev with small disks +# 1. Create an anymirror1 vdev with small disks # 2. Attach larger disk # 3. Verify that not all the new space can be used # 4. Attach another larger disk # 5. Verify that all space is now usable -# 6. Repeat steps 1-5 with anyraid2 +# 6. Repeat steps 1-5 with anymirror2 # verify_runnable "global" @@ -57,40 +57,40 @@ set_tunable64 ANYRAID_MIN_TILE_SIZE 67108864 log_assert "'zpool attach' expands size correctly with anyraid vdevs" -log_must create_pool $TESTPOOL2 anyraid1 /$TESTPOOL/vdev_file.{0,1,2} +log_must create_pool $TESTPOOL2 anymirror1 /$TESTPOOL/vdev_file.{0,1,2} cap=$(zpool get -Hp -o value size $TESTPOOL2) -log_must zpool attach $TESTPOOL2 anyraid1-0 /$TESTPOOL/vdev_file.4 +log_must zpool attach $TESTPOOL2 anymirror1-0 /$TESTPOOL/vdev_file.4 new_cap=$(zpool get -Hp -o value size $TESTPOOL2) new_cap=$((new_cap - cap)) [[ "$new_cap" -eq $((3 * 64 * 1024 * 1024)) ]] || \ log_fail "Incorrect space added on attach: $new_cap" -log_must zpool attach $TESTPOOL2 anyraid1-0 /$TESTPOOL/vdev_file.5 +log_must zpool attach $TESTPOOL2 anymirror1-0 /$TESTPOOL/vdev_file.5 new_cap=$(zpool get -Hp -o value size $TESTPOOL2) new_cap=$((new_cap - cap)) [[ "$new_cap" -eq $(((2048 - 256 - 64) * 1024 * 1024)) ]] || \ log_fail "Incorrect space added on attach: $new_cap" log_must zpool destroy $TESTPOOL2 -log_must create_pool $TESTPOOL2 anyraid2 /$TESTPOOL/vdev_file.{0,1,2,3} +log_must create_pool $TESTPOOL2 anymirror2 /$TESTPOOL/vdev_file.{0,1,2,3} cap=$(zpool get -Hp -o value size $TESTPOOL2) -log_must zpool attach $TESTPOOL2 anyraid2-0 /$TESTPOOL/vdev_file.4 +log_must zpool attach $TESTPOOL2 anymirror2-0 /$TESTPOOL/vdev_file.4 new_cap=$(zpool get -Hp -o value size $TESTPOOL2) new_cap=$((new_cap - cap)) [[ "$new_cap" -eq $((64 * 1024 * 1024)) ]] || \ log_fail "Incorrect space added on attach: $new_cap" -log_must zpool attach $TESTPOOL2 anyraid2-0 /$TESTPOOL/vdev_file.5 +log_must zpool attach $TESTPOOL2 anymirror2-0 /$TESTPOOL/vdev_file.5 new_cap=$(zpool get -Hp -o value size $TESTPOOL2) new_cap=$((new_cap - cap)) [[ "$new_cap" -eq $((256 * 1024 * 1024)) ]] || \ log_fail "Incorrect space added on attach: $new_cap" -log_must zpool attach $TESTPOOL2 anyraid2-0 /$TESTPOOL/vdev_file.6 +log_must zpool attach $TESTPOOL2 anymirror2-0 /$TESTPOOL/vdev_file.6 new_cap=$(zpool get -Hp -o value size $TESTPOOL2) new_cap=$((new_cap - cap)) [[ "$new_cap" -eq $(((2048 - 256 - 64) * 1024 * 1024)) ]] || \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_001_pos.ksh index 16a98864e138..879e38c5257b 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_001_pos.ksh @@ -73,11 +73,11 @@ create_pool_test "$TESTPOOL" "" "$pooldevs" create_pool_test "$TESTPOOL" "mirror" "$mirrordevs" create_pool_test "$TESTPOOL" "raidz" "$raidzdevs" create_pool_test "$TESTPOOL" "raidz1" "$raidzdevs" -create_pool_test "$TESTPOOL" "anyraid" "$anyraiddevs" -create_pool_test "$TESTPOOL" "anyraid0" "$anyraiddevs" -create_pool_test "$TESTPOOL" "anyraid1" "$anyraiddevs" -create_pool_test "$TESTPOOL" "anyraid2" "$anyraiddevs" -create_pool_test "$TESTPOOL" "anyraid3" "$anyraiddevs" +create_pool_test "$TESTPOOL" "anymirror" "$anyraiddevs" +create_pool_test "$TESTPOOL" "anymirror0" "$anyraiddevs" +create_pool_test "$TESTPOOL" "anymirror1" "$anyraiddevs" +create_pool_test "$TESTPOOL" "anymirror2" "$anyraiddevs" +create_pool_test "$TESTPOOL" "anymirror3" "$anyraiddevs" create_pool_test "$TESTPOOL" "draid" "$draiddevs" log_pass "'zpool create ...' success." diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh index b7f3041342ff..d0ac83c2b9f7 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh @@ -55,7 +55,7 @@ log_assert "'zpool create [-R root][-m mountpoint] ...' can create "an alternate pool or a new pool mounted at the specified mountpoint." log_onexit cleanup -set -A pooltype "" "mirror" "raidz" "raidz1" "raidz2" "anyraid" "anyraid0" "anyraid1" "anyraid2" "anyraid3" "draid" "draid2" +set -A pooltype "" "mirror" "raidz" "raidz1" "raidz2" "anymirror" "anymirror0" "anymirror1" "anymirror2" "anymirror3" "draid" "draid2" # # cleanup the pools created in previous case if zpool_create_004_pos timedout diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_006_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_006_pos.ksh index 5d15fec2707c..44bc6077c407 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_006_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_006_pos.ksh @@ -98,11 +98,12 @@ set -A valid_args \ "raidz2 $vdev0 $vdev1 $vdev2 spare $vdev3 raidz2 $vdev4 $vdev5 $vdev6" \ "raidz3 $vdev0 $vdev1 $vdev2 $vdev3 \ mirror $vdev4 $vdev5 $vdev6 $vdev7" \ - "anyraid0 $vdev0" \ - "anyraid0 $vdev0 $vdev1 anyraid0 $vdev2 $vdev3" \ - "anyraid1 $vdev0 $vdev1 anyraid1 $vdev2 $vdev3" \ - "anyraid2 $vdev0 $vdev1 $vdev2 anyraid2 $vdev3 $vdev4 $vdev5" \ - "anyraid3 $vdev0 $vdev1 $vdev2 $vdev3 anyraid3 $vdev4 $vdev5 $vdev6 $vdev7" \ + "anymirror0 $vdev0" \ + "anymirror0 $vdev0 $vdev1 anymirror0 $vdev2 $vdev3" \ + "anymirror1 $vdev0 $vdev1 anymirror1 $vdev2 $vdev3" \ + "anymirror2 $vdev0 $vdev1 $vdev2 anymirror2 $vdev3 $vdev4 $vdev5" \ + "anymirror2 $vdev0 $vdev1 $vdev2 $vdev3 anymirror2 $vdev4 $vdev5 $vdev6" \ + "anymirror3 $vdev0 $vdev1 $vdev2 $vdev3 anymirror3 $vdev4 $vdev5 $vdev6 $vdev7" \ "draid $vdev0 $vdev1 $vdev2 mirror $vdev3 $vdev4" \ "draid $vdev0 $vdev1 $vdev2 raidz1 $vdev3 $vdev4 $vdev5" \ "draid $vdev0 $vdev1 $vdev2 draid1 $vdev3 $vdev4 $vdev5" \ @@ -138,10 +139,9 @@ set -A forced_args \ spare $vdev4 raidz2 $vdev5 $vdev6 $vdev7" \ "mirror $vdev0 $vdev1 draid $vdev2 $vdev3 $vdev4 \ draid2 $vdev5 $vdev6 $vdev7 $vdev8 spare $vdev9" \ - "anyraid0 $vdev0 anyraid $vdev1 $vdev2" \ - "anyraid1 $vdev0 $vdev1 anyraid2 $vdev2 $vdev3 $vdev4" \ - "anyraid2 $vdev0 $vdev1 $vdev2 $vdev3 anyraid2 $vdev4 $vdev5 $vdev6" \ - "anyraid3 $vdev0 $vdev1 $vdev2 $vdev3 anyraid0 $vdev4" \ + "anymirror0 $vdev0 anymirror $vdev1 $vdev2" \ + "anymirror1 $vdev0 $vdev1 anymirror2 $vdev2 $vdev3 $vdev4" \ + "anymirror3 $vdev0 $vdev1 $vdev2 $vdev3 anymirror0 $vdev4" \ "draid $vdev0 $vdev1 $vdev2 $vdev3 \ draid2 $vdev4 $vdev5 $vdev6 $vdev7 $vdev8" \ "draid $vdev0 $vdev1 $vdev2 draid $vdev4 $vdev5 $vdev6 \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_007_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_007_neg.ksh index 82bbf79441b2..94cfd98c7055 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_007_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_007_neg.ksh @@ -57,10 +57,10 @@ set -A args "" "-?" "-n" "-f" "-nf" "-fn" "-f -n" "--f" "-e" "-s" \ "$TESTPOOL raidz1" "$TESTPOOL mirror raidz1" \ "$TESTPOOL draid1" "$TESTPOOL mirror draid1" \ "$TESTPOOL anyraid" "$TESTPOOL mirror anyraid" \ - "$TESTPOOL anyraid0" "$TESTPOOL mirror anyraid0" \ - "$TESTPOOL anyraid1 $DISK0" \ - "$TESTPOOL anyraid2 $DISK0 $DISK1" \ - "$TESTPOOL anyraid3 $DISK0 $DISK1 $DISK2" \ + "$TESTPOOL anymirror0" "$TESTPOOL mirror anymirror0" \ + "$TESTPOOL anymirror1 $DISK0" \ + "$TESTPOOL anymirror2 $DISK0 $DISK1" \ + "$TESTPOOL anymirror3 $DISK0 $DISK1 $DISK2" \ "$TESTPOOL mirror c?t?d?" "$TESTPOOL mirror $DISK0 c0t1d?" \ "$TESTPOOL RAIDZ $DISK0 $DISK1" \ "$TESTPOOL $DISK0 log $DISK1 log $DISK2" \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_009_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_009_neg.ksh index fce791caaf3d..1ebfd5bc8d16 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_009_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_009_neg.ksh @@ -68,7 +68,7 @@ create_sparse_files "file" 4 $MINVDEVSIZE2 unset NOINUSE_CHECK typeset opt -for opt in "" "mirror" "raidz" "anyraid" "draid"; do +for opt in "" "mirror" "raidz" "anymirror" "draid"; do if [[ $opt == "" ]]; then typeset disks=$file0 else diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_010_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_010_neg.ksh index c08ba6afdaca..188e8768fda5 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_010_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_010_neg.ksh @@ -73,7 +73,7 @@ set -A args \ "$TOOSMALL $TESTDIR/file1" "$TESTPOOL1 $TESTDIR/file1 $TESTDIR/file2" \ "$TOOSMALL mirror $TESTDIR/file1 $TESTDIR/file2" \ "$TOOSMALL raidz $TESTDIR/file1 $TESTDIR/file2" \ - "$TOOSMALL anyraid0 $TESTDIR/file1" \ + "$TOOSMALL anymirror0 $TESTDIR/file1" \ "$TOOSMALL draid $TESTDIR/file1 $TESTDIR/file2 $TESTDIR/file3" typeset -i i=0 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_001_pos.ksh index 544d5c715fe7..8b1ae0b23a82 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_001_pos.ksh @@ -32,8 +32,8 @@ # Create a variety of AnyRAID pools using the minimal vdev syntax. # # STRATEGY: -# 1. Create the required number of allowed AnyRAID vdevs. -# 2. Create few pools of various sizes using the anyraid* syntax. +# 1. Create the required number of allowed vdevs. +# 2. Create few pools of various sizes using the anymirror* syntax. # verify_runnable "global" @@ -43,21 +43,21 @@ function cleanup poolexists $TESTPOOL && destroy_pool $TESTPOOL } -log_assert "'zpool create ...' can create a pool." +log_assert "'zpool create ...' can create a pool." log_onexit cleanup create_sparse_files "disk" 4 $MINVDEVSIZE2 # Verify the default parity -log_must zpool create $TESTPOOL anyraid $disks +log_must zpool create $TESTPOOL anymirror $disks log_must poolexists $TESTPOOL destroy_pool $TESTPOOL # Verify specified parity for parity in {0..3}; do - log_must zpool create $TESTPOOL anyraid$parity $disks + log_must zpool create $TESTPOOL anymirror$parity $disks log_must poolexists $TESTPOOL destroy_pool $TESTPOOL done -log_pass "'zpool create ...' can create a pool." +log_pass "'zpool create ...' can create a pool." diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_002_pos.ksh index a5d0eb0928bd..6060b2bfb7d0 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_002_pos.ksh @@ -58,12 +58,12 @@ log_must truncate -s $MINVDEVSIZE2 $all_vdevs # Verify pool sizes from 254-255. for (( i=254; i<=255; i++ )); do - log_must zpool create $TESTPOOL anyraid3 \ + log_must zpool create $TESTPOOL anymirror3 \ $(echo $TESTDIR/file.{01..$i}) log_must destroy_pool $TESTPOOL done # Exceeds maximum AnyRAID vdev count (256). -log_mustnot zpool create $TESTPOOL anyraid3 $(echo $TESTDIR/file.{01..256}) +log_mustnot zpool create $TESTPOOL anymirror3 $(echo $TESTDIR/file.{01..256}) log_pass "'zpool create anyraid ...' can create a pool with maximum number of vdevs." diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_003_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_003_pos.ksh index 7ee7c304eb6f..6d292f9d420d 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_003_pos.ksh @@ -53,7 +53,7 @@ create_sparse_files "Cdisk" 1 $(( $MINVDEVSIZE2 * 3 )) ls -lh $Adisks $Bdisks $Cdisks for parity in {0..3}; do - log_must zpool create $TESTPOOL anyraid$parity $Cdisks $Adisks $Bdisks + log_must zpool create $TESTPOOL anymirror$parity $Cdisks $Adisks $Bdisks log_must poolexists $TESTPOOL destroy_pool $TESTPOOL done diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_004_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_004_pos.ksh new file mode 100755 index 000000000000..0cb3e106c1c1 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_004_pos.ksh @@ -0,0 +1,57 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_create/zpool_create.shlib + +# +# DESCRIPTION: +# Verify that AnyRAID vdevs of different sizes can be mixed in a pool +# +# STRATEGY: +# 1. Create a pool with two anyraid vdevs with different disk counts +# 2. Verify the pool created successfully +# + +verify_runnable "global" + +function cleanup +{ + poolexists $TESTPOOL && destroy_pool $TESTPOOL +} + +log_assert "Pools can have multiple anyraid children with different disk counts" +log_onexit cleanup + +create_sparse_files "disk" 5 $MINVDEVSIZE2 + +# Verify the default parity +log_must zpool create $TESTPOOL anymirror $disk0 $disk1 $disk2 anymirror $disk3 $disk4 +log_must poolexists $TESTPOOL +destroy_pool $TESTPOOL + +log_pass "Pools can have multiple anyraid children with different disk counts." diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_005_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_005_neg.ksh new file mode 100755 index 000000000000..7b34cac12f88 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_005_neg.ksh @@ -0,0 +1,56 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Run negative tests relating to anyraid vdevs and pool creation +# +# STRATEGY: +# 1. Try to create a pool with an invalid parity string +# 2. Try to create a pool with too large a parity +# + +verify_runnable "global" + +function cleanup +{ + poolexists $TESTPOOL && destroy_pool $TESTPOOL +} + +log_assert "anyraid vdev specifications detect problems correctly" +log_onexit cleanup + +create_sparse_files "disk" 4 $MINVDEVSIZE2 + +log_mustnot zpool create $TESTPOOL anymirrorq $disks +log_mustnot zpool create $TESTPOOL anymirrorq1 $disks +log_mustnot zpool create $TESTPOOL anymirror-1 $disks +log_mustnot zpool create $TESTPOOL anymirror4 $disks + +log_pass "anyraid vdev specifications detect problems correctly" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_anyraid_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_anyraid_001_pos.ksh index 8f3db4b3e424..7eabefc46dc9 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_anyraid_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_anyraid_001_pos.ksh @@ -51,7 +51,7 @@ poolexists $TESTPOOL && destroy_pool $TESTPOOL create_sparse_files "disk" 4 $MINVDEVSIZE2 -log_must zpool create $TESTPOOL anyraid3 $disks +log_must zpool create $TESTPOOL anymirror3 $disks log_must poolexists $TESTPOOL log_must zpool export $TESTPOOL diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_010_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_010_pos.ksh index 841b8693ec16..3fe1fea0bc3a 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_010_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_010_pos.ksh @@ -88,7 +88,7 @@ log_must zpool create $poolE $VDEV4 log_must zpool destroy $poolE truncate -s 24G $VDEV6 -log_must zpool create $poolF anyraid0 $VDEV6 +log_must zpool create $poolF anymirror0 $VDEV6 log_must zpool destroy $poolF log_must zpool import -d $DEVICE_DIR -D -f -a diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_anyraid_attach.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_anyraid_attach.ksh index 9c6959e913ef..dd4616670183 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_anyraid_attach.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_anyraid_attach.ksh @@ -41,13 +41,13 @@ DISK1="$(echo $DISKS | cut -d' ' -f1)" DISK2="$(echo $DISKS | cut -d' ' -f2)" DISK3="$(echo $DISKS | cut -d' ' -f3)" -log_must zpool create -f $TESTPOOL anyraid1 $DISK1 $DISK2 +log_must zpool create -f $TESTPOOL anymirror1 $DISK1 $DISK2 log_must zpool initialize $TESTPOOL $DISK1 progress="$(initialize_progress $TESTPOOL $DISK1)" [[ -z "$progress" ]] && log_fail "Initializing did not start" -log_must zpool attach $TESTPOOL anyraid1-0 $DISK3 +log_must zpool attach $TESTPOOL anymirror1-0 $DISK3 new_progress="$(initialize_progress $TESTPOOL $DISK1)" [[ "$progress" -le "$new_progress" ]] || \ log_fail "Lost initializing progress on AnyRAID1 attach" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_fault_export_import_online.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_fault_export_import_online.ksh index 48a86d5f9400..3b24d36ab4d6 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_fault_export_import_online.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_fault_export_import_online.ksh @@ -36,9 +36,9 @@ DISK1="$(echo $DISKS | cut -d' ' -f1)" DISK2="$(echo $DISKS | cut -d' ' -f2)" -for type in "mirror" "anyraid1"; do +for type in "mirror" "anymirror1"; do log_must zpool create -f $TESTPOOL $type $DISK1 $DISK2 - if [[ "$type" == "anyraid1" ]]; then + if [[ "$type" == "anymirror1" ]]; then log_must dd if=/dev/urandom of=/$TESTPOOL/f1 bs=1M count=2k log_must zpool sync log_must rm /$TESTPOOL/f1 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_import_export.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_import_export.ksh index f8709875c757..7f386a9c9ec3 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_import_export.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_import_export.ksh @@ -46,10 +46,10 @@ DISK1="$(echo $DISKS | cut -d' ' -f1)" DISK2="$(echo $DISKS | cut -d' ' -f2)" -for type in "" "anyraid1"; do +for type in "" "anymirror1"; do if [[ "$type" = "" ]]; then VDEVS="$DISK1" - elif [[ "$type" = "anyraid1" ]]; then + elif [[ "$type" = "anymirror1" ]]; then VDEVS="$DISK1 $DISK2" fi diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_offline_export_import_online.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_offline_export_import_online.ksh index 419aea25c91b..5a5d3e18f6f0 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_offline_export_import_online.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_offline_export_import_online.ksh @@ -43,7 +43,7 @@ DISK1="$(echo $DISKS | cut -d' ' -f1)" DISK2="$(echo $DISKS | cut -d' ' -f2)" DISK3="$(echo $DISKS | cut -d' ' -f3)" -for type in "mirror" "anyraid1"; do +for type in "mirror" "anymirror1"; do if [[ "$type" =~ "anyraid" ]]; then export disks="$DISK1 $DISK2 $DISK3" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_online_offline.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_online_offline.ksh index 15b8f32a8db3..6cd092bfc8f7 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_online_offline.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_online_offline.ksh @@ -49,7 +49,7 @@ DISK3="$(echo $DISKS | cut -d' ' -f3)" log_onexit_push zpool status -v -for type in "mirror" "anyraid1"; do +for type in "mirror" "anymirror1"; do if [[ "$type" == "mirror" ]]; then log_must zpool create -f $TESTPOOL $type $DISK1 $DISK2 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_neg.ksh index 0dad44cf54bd..8fdf60ac4287 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_neg.ksh @@ -44,11 +44,11 @@ DISK1=${DISKS%% *} DISK2="$(echo $DISKS | cut -d' ' -f2)" DISK3="$(echo $DISKS | cut -d' ' -f3)" -for type in "" "anyraid2"; do +for type in "" "anymirror2"; do log_must zpool list -v log_must zpool create -f $TESTPOOL $type $DISK1 $DISK2 $DISK3 - if [[ "$type" == "anyraid2" ]]; then + if [[ "$type" == "anymirror2" ]]; then log_must dd if=/dev/urandom of=/$TESTPOOL/f1 bs=1M count=2k log_must zpool sync log_must rm /$TESTPOOL/f1 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_pos.ksh index a4a91e8d122d..65b56a067f0d 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_pos.ksh @@ -40,7 +40,7 @@ DISK1=${DISKS%% *} -for type in "" "anyraid0"; do +for type in "" "anymirror0"; do log_must zpool create -f $TESTPOOL $type $DISK1 log_must zpool initialize $TESTPOOL diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh index 22cb4fbd792c..b417b51ade65 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh @@ -79,12 +79,12 @@ function status_check_all # pool disk-state status_check "$pool" "$disk_state" "$disk_state" "$disk_state" } -for type in "" "anyraid1"; do +for type in "" "anymirror1"; do # 1. Create a one-disk pool. log_must zpool create -f $TESTPOOL $type $DISK1 $DISK2 $DISK3 status_check_all $TESTPOOL "uninitialized" - if [[ "$type" == "anyraid1" ]]; then + if [[ "$type" == "anymirror1" ]]; then log_must dd if=/dev/urandom of=/$TESTPOOL/f1 bs=1M count=2k log_must zpool sync log_must rm /$TESTPOOL/f1 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_checksums.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_checksums.ksh index b5c2cda1ee1d..a25fabfaee7d 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_checksums.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_checksums.ksh @@ -42,7 +42,7 @@ DISK1=${DISKS%% *} -for type in "" "anyraid0"; do +for type in "" "anymirror0"; do log_must zpool create -f $TESTPOOL $type $DISK1 log_must dd if=/dev/urandom of=/$TESTPOOL/file1 bs=1M count=30 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_initialized.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_initialized.ksh index 5cdf6d94e834..00a9f21896da 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_initialized.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_initialized.ksh @@ -60,7 +60,7 @@ log_must set_tunable64 INITIALIZE_VALUE $(printf %llu 0x$PATTERN) log_must mkdir "$TESTDIR" log_must truncate -s $MINVDEVSIZE "$SMALLFILE" -for type in "" "anyraid0"; do +for type in "" "anymirror0"; do log_must zpool create $TESTPOOL $type "$SMALLFILE" log_must zpool initialize -w $TESTPOOL diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh index 2f28f4874a99..ba0faf92bfe6 100755 --- a/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh @@ -56,7 +56,7 @@ zed_events_drain TESTFILE="/$TESTPOOL/$TESTFS/testfile" -for type in "mirror" "raidz" "raidz2" "draid:1s" "anyraid1" "anyraid2" "anyraid3"; do +for type in "mirror" "raidz" "raidz2" "draid:1s" "anymirror1" "anymirror2" "anymirror3"; do if [[ "$type" =~ "anyraid" ]]; then export VDEVSIZE=1073741824 export TESTFILE_SIZE=268435456 diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh index a1746757c299..d92336e7cb09 100755 --- a/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh @@ -59,7 +59,7 @@ fi TESTFILE="/$TESTPOOL/$TESTFS/testfile" -for type in "mirror" "raidz" "raidz2" "anyraid1" "anyraid2" "anyraid3"; do +for type in "mirror" "raidz" "raidz2" "anymirror1" "anymirror2" "anymirror3"; do if [[ "$type" =~ "anyraid" ]]; then export VDEVSIZE=1073741824 export TESTFILE_SIZE=268435456 diff --git a/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh b/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh index a4c7a2ac49f6..36661cec167e 100755 --- a/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh +++ b/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh @@ -71,7 +71,7 @@ log_must set_tunable64 VDEV_MIN_MS_COUNT 32 typeset VDEV_MAX_MB=$(( floor(4 * MINVDEVSIZE * 0.75 / 1024 / 1024) )) typeset VDEV_MIN_MB=$(( floor(4 * MINVDEVSIZE * 0.30 / 1024 / 1024) )) -for type in "" "mirror" "anyraid0" "anyraid1" "anyraid2" "anyraid3" "raidz2" "draid"; do +for type in "" "mirror" "anymirror0" "anymirror1" "anymirror2" "anymirror3" "raidz2" "draid"; do if [[ "$type" = "" ]]; then VDEVS="$TRIM_VDEV1" diff --git a/tests/zfs-tests/tests/functional/trim/autotrim_integrity.ksh b/tests/zfs-tests/tests/functional/trim/autotrim_integrity.ksh index 99c5efd3a5a1..b6f4ade9b43a 100755 --- a/tests/zfs-tests/tests/functional/trim/autotrim_integrity.ksh +++ b/tests/zfs-tests/tests/functional/trim/autotrim_integrity.ksh @@ -61,7 +61,7 @@ log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 512 typeset trim_txg_batch=$(get_tunable TRIM_TXG_BATCH) log_must set_tunable64 TRIM_TXG_BATCH 8 -for type in "" "mirror" "anyraid0" "anyraid1" "anyraid2" "anyraid3" "raidz" "draid"; do +for type in "" "mirror" "anymirror0" "anymirror1" "anymirror2" "anymirror3" "raidz" "draid"; do log_must truncate -s 1G $TRIM_VDEVS log_must zpool create -f $TESTPOOL $type $TRIM_VDEVS diff --git a/tests/zfs-tests/tests/functional/trim/autotrim_trim_integrity.ksh b/tests/zfs-tests/tests/functional/trim/autotrim_trim_integrity.ksh index b6ff889c0849..44d3690aae62 100755 --- a/tests/zfs-tests/tests/functional/trim/autotrim_trim_integrity.ksh +++ b/tests/zfs-tests/tests/functional/trim/autotrim_trim_integrity.ksh @@ -62,7 +62,7 @@ log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 512 typeset trim_txg_batch=$(get_tunable TRIM_TXG_BATCH) log_must set_tunable64 TRIM_TXG_BATCH 8 -for type in "" "mirror" "anyraid0" "anyraid1" "anyraid2" "anyraid3" "raidz" "raidz2" "draid" "draid2"; do +for type in "" "mirror" "anymirror0" "anymirror1" "anymirror2" "anymirror3" "raidz" "raidz2" "draid" "draid2"; do log_must truncate -s 1G $TRIM_VDEVS log_must zpool create -f $TESTPOOL $type $TRIM_VDEVS diff --git a/tests/zfs-tests/tests/functional/trim/trim_config.ksh b/tests/zfs-tests/tests/functional/trim/trim_config.ksh index 266df6f41efe..7d22b6e5d4eb 100755 --- a/tests/zfs-tests/tests/functional/trim/trim_config.ksh +++ b/tests/zfs-tests/tests/functional/trim/trim_config.ksh @@ -71,19 +71,19 @@ log_must set_tunable64 VDEV_MIN_MS_COUNT 32 typeset VDEV_MAX_MB=$(( 4 * MINVDEVSIZE / 1024 / 1024 )) typeset VDEV_MIN_MB=0 -for type in "" "mirror" "anyraid0" "anyraid1" "anyraid2" "anyraid3" "raidz2" "draid"; do +for type in "" "mirror" "anymirror0" "anymirror1" "anymirror2" "anymirror3" "raidz2" "draid"; do if [[ "$type" = "" ]]; then VDEVS="$TRIM_VDEV1" elif [[ "$type" = "mirror" ]]; then VDEVS="$TRIM_VDEV1 $TRIM_VDEV2" - elif [[ "$type" = "anyraid0" ]]; then + elif [[ "$type" = "anymirror0" ]]; then VDEVS="$TRIM_VDEV1" - elif [[ "$type" = "anyraid1" ]]; then + elif [[ "$type" = "anymirror1" ]]; then VDEVS="$TRIM_VDEV1 $TRIM_VDEV2" - elif [[ "$type" = "anyraid2" ]]; then + elif [[ "$type" = "anymirror2" ]]; then VDEVS="$TRIM_VDEV1 $TRIM_VDEV2 $TRIM_VDEV3" - elif [[ "$type" = "anyraid3" ]]; then + elif [[ "$type" = "anymirror3" ]]; then VDEVS="$TRIM_VDEV1 $TRIM_VDEV2 $TRIM_VDEV3 $TRIM_VDEV4" elif [[ "$type" = "raidz2" ]]; then VDEVS="$TRIM_VDEV1 $TRIM_VDEV2 $TRIM_VDEV3" diff --git a/tests/zfs-tests/tests/functional/trim/trim_integrity.ksh b/tests/zfs-tests/tests/functional/trim/trim_integrity.ksh index 52ebbc797e38..edde3830d1b2 100755 --- a/tests/zfs-tests/tests/functional/trim/trim_integrity.ksh +++ b/tests/zfs-tests/tests/functional/trim/trim_integrity.ksh @@ -61,7 +61,7 @@ log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 512 typeset trim_txg_batch=$(get_tunable TRIM_TXG_BATCH) log_must set_tunable64 TRIM_TXG_BATCH 8 -for type in "" "mirror" "anyraid0" "anyraid1" "anyraid2" "anyraid3" "raidz" "draid"; do +for type in "" "mirror" "anymirror0" "anymirror1" "anymirror2" "anymirror3" "raidz" "draid"; do log_must truncate -s 1G $TRIM_VDEVS log_must zpool create -f $TESTPOOL $type $TRIM_VDEVS From 8f6c4416fbed9f833db2fa8cdfd7c5d10b6a9f5f Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Fri, 3 Oct 2025 10:30:41 -0700 Subject: [PATCH 14/21] Fix test failures Signed-off-by: Paul Dagnelie --- .../tests/functional/cli_root/zpool_add/zpool_add_001_pos.ksh | 2 +- tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh | 2 +- tests/zfs-tests/tests/functional/trim/trim_config.ksh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_001_pos.ksh index fadf79272fc8..b082a57f114e 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_001_pos.ksh @@ -55,7 +55,7 @@ log_assert "'zpool add ...' can add devices to the pool." log_onexit cleanup -set -A keywords "" "mirror" "raidz" "raidz1" "anyraid" "anymirror1" "anymirror2" "anymirror3" "draid:1s" "draid1:1s" "spare" +set -A keywords "" "mirror" "raidz" "raidz1" "anymirror" "anymirror1" "anymirror2" "anymirror3" "draid:1s" "draid1:1s" "spare" create_sparse_files "disk" 4 $MINVDEVSIZE2 create_sparse_files "extradisk" 4 $MINVDEVSIZE2 diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh index ba0faf92bfe6..95584af4927b 100755 --- a/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh @@ -57,7 +57,7 @@ zed_events_drain TESTFILE="/$TESTPOOL/$TESTFS/testfile" for type in "mirror" "raidz" "raidz2" "draid:1s" "anymirror1" "anymirror2" "anymirror3"; do - if [[ "$type" =~ "anyraid" ]]; then + if [[ "$type" =~ "anymirror" ]]; then export VDEVSIZE=1073741824 export TESTFILE_SIZE=268435456 else diff --git a/tests/zfs-tests/tests/functional/trim/trim_config.ksh b/tests/zfs-tests/tests/functional/trim/trim_config.ksh index 7d22b6e5d4eb..efce21a948e8 100755 --- a/tests/zfs-tests/tests/functional/trim/trim_config.ksh +++ b/tests/zfs-tests/tests/functional/trim/trim_config.ksh @@ -91,7 +91,7 @@ for type in "" "mirror" "anymirror0" "anymirror1" "anymirror2" "anymirror3" "rai VDEVS="$TRIM_VDEV1 $TRIM_VDEV2 $TRIM_VDEV3 $TRIM_VDEV4" fi - if [[ "$type" =~ "anyraid" ]]; then + if [[ "$type" =~ "anymirror" ]]; then # The AnyRAID VDEV takes some space for the mapping itself VDEV_MAX_MB=$(( floor(3 * MINVDEVSIZE * 0.75 / 1024 / 1024) )) VDEV_MIN_MB=$(( floor(3 * MINVDEVSIZE * 0.30 / 1024 / 1024) )) From c5e8df36c0a0d006e8d744f3b4a45fe1110fb449 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Fri, 3 Oct 2025 13:51:45 -0700 Subject: [PATCH 15/21] fix printing layout Signed-off-by: Paul Dagnelie --- cmd/zdb/zdb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 631bf1d850e3..288d12ee71b1 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -9447,11 +9447,12 @@ static void print_separator_line(int cols, int colwidth, boolean_t *print, boolean_t *final) { char buf[64]; - ASSERT3U(colwidth, <, sizeof (buf) - 2); - int len = 0; + ASSERT3U(colwidth * strlen("─"), <, sizeof (buf) - 2); + int len = 0, off = 0; // Create a buffer with the cell separator to make later code simpler. while (len < colwidth) { - len += snprintf(buf + len, sizeof (buf) - len, "─"); + len++; + off += snprintf(buf + off, sizeof (buf) - off, "─"); } for (int i = 0; i < cols; i++) { From 31ba656fde3f848266c181e1d930c4e6cfe9a3ca Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Tue, 7 Oct 2025 15:38:32 -0700 Subject: [PATCH 16/21] Tony feedback Signed-off-by: Paul Dagnelie --- cmd/zdb/zdb.c | 6 +++--- include/sys/fs/zfs.h | 2 +- tests/runfiles/sanity.run | 11 +++++------ tests/zfs-tests/include/libtest.shlib | 2 +- .../anyraid_faildisk_write_replace_resilver.ksh | 8 ++++---- .../anyraid/anyraid_special_vdev_001_pos.ksh | 7 ++++++- .../functional/cli_root/zfs_mount/zfs_mount.kshlib | 2 +- .../cli_root/zpool_add/zpool_add_009_neg.ksh | 2 +- .../cli_root/zpool_create/zpool_create_007_neg.ksh | 2 +- .../zpool_create/zpool_create_anyraid_002_pos.ksh | 4 ++-- .../zpool_initialize_fault_export_import_online.ksh | 4 ++-- .../zpool_initialize_offline_export_import_online.ksh | 2 +- .../zpool_initialize_online_offline.ksh | 2 +- .../zpool_initialize_start_and_cancel_neg.ksh | 4 ++-- .../zpool_initialize/zpool_initialize_uninit.ksh | 4 ++-- .../tests/functional/fault/auto_spare_002_pos.ksh | 2 +- .../tests/functional/trim/autotrim_config.ksh | 2 +- 17 files changed, 35 insertions(+), 31 deletions(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 288d12ee71b1..7507bbb02ffc 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -9437,7 +9437,7 @@ dummy_get_file_info(dmu_object_type_t bonustype, const void *data, } static int -log_10(uint64_t v) { +numlen(uint64_t v) { char buf[32]; snprintf(buf, sizeof (buf), "%llu", (u_longlong_t)v); return (strlen(buf)); @@ -9497,8 +9497,8 @@ zdb_print_anyraid_tile_layout(vdev_t *vd) ASSERT3P(vd->vdev_ops, ==, &vdev_anyraid_ops); vdev_anyraid_t *var = vd->vdev_tsd; int cols = vd->vdev_children; - int textwidth = MAX(8, log_10(avl_numnodes(&var->vd_tile_map)) + - var->vd_nparity > 0 ? log_10(var->vd_nparity + 1) + 1 : 0); + int textwidth = MAX(8, numlen(avl_numnodes(&var->vd_tile_map)) + + var->vd_nparity > 0 ? numlen(var->vd_nparity + 1) + 1 : 0); int colwidth = textwidth + 2; // Create and populate table with all the values we need to print. diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 1ddedbc01d99..08f8cac2e470 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -911,7 +911,7 @@ typedef struct zpool_load_policy { #define ZPOOL_CONFIG_DRAID_NGROUPS "draid_ngroups" /* ANYRAID configuration */ -#define ZPOOL_CONFIG_ANYRAID_PARITY_TYPE "parity_type" +#define ZPOOL_CONFIG_ANYRAID_PARITY_TYPE "anyraid_parity_type" #define VDEV_TYPE_ROOT "root" #define VDEV_TYPE_MIRROR "mirror" diff --git a/tests/runfiles/sanity.run b/tests/runfiles/sanity.run index 35fd0b13b3c9..0a6cdebbb869 100644 --- a/tests/runfiles/sanity.run +++ b/tests/runfiles/sanity.run @@ -269,12 +269,11 @@ tests = ['zpool_create_001_pos', 'zpool_create_002_pos', 'zpool_create_012_neg', 'zpool_create_014_neg', 'zpool_create_015_neg', 'zpool_create_017_neg', 'zpool_create_018_pos', 'zpool_create_019_pos', 'zpool_create_020_pos', 'zpool_create_021_pos', 'zpool_create_022_pos', - 'zpool_create_anyraid_001_pos', 'zpool_create_anyraid_002_pos', - 'zpool_create_anyraid_003_pos', 'zpool_create_anyraid_004_pos', - 'zpool_create_anyraid_005_neg', 'zpool_create_encrypted', - 'zpool_create_features_001_pos', 'zpool_create_features_002_pos', - 'zpool_create_features_003_pos', 'zpool_create_features_004_neg', - 'zpool_create_features_005_pos'] + 'zpool_create_anyraid_001_pos', 'zpool_create_anyraid_003_pos', + 'zpool_create_anyraid_004_pos', 'zpool_create_anyraid_005_neg', + 'zpool_create_encrypted', 'zpool_create_features_001_pos', + 'zpool_create_features_002_pos', 'zpool_create_features_003_pos', + 'zpool_create_features_004_neg', 'zpool_create_features_005_pos'] tags = ['functional', 'cli_root', 'zpool_create'] [tests/functional/cli_root/zpool_destroy] diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index 9feef066fe91..85fd1869e2c7 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -1856,7 +1856,7 @@ function verify_pool function get_disklist # pool { echo $(zpool iostat -v $1 | awk '(NR > 4) {print $1}' | \ - grep -vEe '^-----' -e "^(mirror|raidz[1-3]|anyraid|draid[1-3]|spare|log|cache|special|dedup)|\-[0-9]$") + grep -vEe '^-----' -e "^(mirror|raidz[1-3]|anymirror|draid[1-3]|spare|log|cache|special|dedup)|\-[0-9]$") } # diff --git a/tests/zfs-tests/tests/functional/anyraid/anyraid_faildisk_write_replace_resilver.ksh b/tests/zfs-tests/tests/functional/anyraid/anyraid_faildisk_write_replace_resilver.ksh index 0fa16b3ce3b2..efa1d4e0a845 100755 --- a/tests/zfs-tests/tests/functional/anyraid/anyraid_faildisk_write_replace_resilver.ksh +++ b/tests/zfs-tests/tests/functional/anyraid/anyraid_faildisk_write_replace_resilver.ksh @@ -55,18 +55,18 @@ for replace_flags in '' '-s'; do log_must create_sparse_files "disk" 3 $DEVSIZE log_must create_sparse_files "spare" 1 $DEVSIZE - log_must zpool create -f $TESTPOOL anymirror1 $disks + log_must zpool create -O compress=off -f $TESTPOOL anymirror1 $disks log_must zfs set primarycache=none $TESTPOOL # Write initial data - log_must dd if=/dev/urandom of=/$TESTPOOL/file1.bin bs=1M count=$(( DEVSIZE / 2 / 1024 / 1024 )) + log_must file_write -o create -f /$TESTPOOL/file1.bin -b 1048576 -c 256 -d Z # Fail one disk log_must truncate -s0 $disk0 # Read initial data, write new data - dd if=/$TESTPOOL/file1.bin of=/dev/null bs=1M count=$(( DEVSIZE / 2 / 1024 / 1024 )) - log_must dd if=/dev/urandom of=/$TESTPOOL/file1.bin bs=1M count=$(( DEVSIZE / 2 / 1024 / 1024 )) + log_must dd if=/$TESTPOOL/file1.bin of=/dev/null bs=1M count=256 + log_must file_write -o create -f /$TESTPOOL/file1.bin -b 1048576 -c 256 -d Y # Check that disk is faulted zpool status diff --git a/tests/zfs-tests/tests/functional/anyraid/anyraid_special_vdev_001_pos.ksh b/tests/zfs-tests/tests/functional/anyraid/anyraid_special_vdev_001_pos.ksh index f481b300a4c2..c4e8728d1334 100755 --- a/tests/zfs-tests/tests/functional/anyraid/anyraid_special_vdev_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/anyraid/anyraid_special_vdev_001_pos.ksh @@ -56,15 +56,20 @@ typeset newcksum for parity in {0..3}; do log_must zpool create -f $TESTPOOL anymirror$parity $disks special mirror $sdisks log_must poolexists $TESTPOOL + log_must zfs set special_small_blocks=4k $TESTPOOL - log_must dd if=/dev/urandom of=/$TESTPOOL/file.bin bs=1M count=128 + log_must dd if=/dev/urandom of=/$TESTPOOL/file.bin bs=1M count=1 + log_must dd if=/dev/urandom of=/$TESTPOOL/small.bin bs=4k count=1 oldcksum=$(xxh128digest /$TESTPOOL/file.bin) + oldsmallcksum=$(xxh128digest /$TESTPOOL/small.bin) log_must zpool export $TESTPOOL log_must zpool import -d $(dirname $disk0) $TESTPOOL newcksum=$(xxh128digest /$TESTPOOL/file.bin) + newsmallcksum=$(xxh128digest /$TESTPOOL/small.bin) log_must test "$oldcksum" = "$newcksum" + log_must test "$oldsmallcksum" = "$newsmallcksum" log_must destroy_pool $TESTPOOL done diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib index 7e447a2f7b81..05b087854627 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib @@ -65,7 +65,7 @@ function setup_filesystem #disklist #pool #fs #mntpoint #type #vdev if [[ $vdev != "" && \ $vdev != "mirror" && \ $vdev != "raidz" && \ - $vdev != "anyraid" && \ + $vdev != "anymirror" && \ $vdev != "draid" ]] ; then log_note "Wrong vdev: (\"$vdev\")" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_009_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_009_neg.ksh index d43d9a0b3fa6..97749bf6f1c6 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_009_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_009_neg.ksh @@ -64,7 +64,7 @@ log_must poolexists $TESTPOOL log_mustnot zpool add -f $TESTPOOL $disk0 -for type in "" "mirror" "raidz" "anyraid" "draid" "spare" "log" "dedup" "special" "cache" +for type in "" "mirror" "raidz" "anymirror" "draid" "spare" "log" "dedup" "special" "cache" do log_mustnot zpool add -f $TESTPOOL $type $disk0 $disk1 log_mustnot zpool add --allow-in-use $TESTPOOL $type $disk0 $disk1 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_007_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_007_neg.ksh index 94cfd98c7055..9f88941932a7 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_007_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_007_neg.ksh @@ -56,7 +56,7 @@ set -A args "" "-?" "-n" "-f" "-nf" "-fn" "-f -n" "--f" "-e" "-s" \ "$TESTPOOL mirror" "$TESTPOOL raidz" "$TESTPOOL mirror raidz" \ "$TESTPOOL raidz1" "$TESTPOOL mirror raidz1" \ "$TESTPOOL draid1" "$TESTPOOL mirror draid1" \ - "$TESTPOOL anyraid" "$TESTPOOL mirror anyraid" \ + "$TESTPOOL anymirror" "$TESTPOOL mirror anymirror" \ "$TESTPOOL anymirror0" "$TESTPOOL mirror anymirror0" \ "$TESTPOOL anymirror1 $DISK0" \ "$TESTPOOL anymirror2 $DISK0 $DISK1" \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_002_pos.ksh index 6060b2bfb7d0..4e1d6cf682a2 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_anyraid_002_pos.ksh @@ -44,8 +44,8 @@ function cleanup { poolexists $TESTPOOL && destroy_pool $TESTPOOL - rm -f $all_vdevs - rmdir $TESTDIR + log_pos rm -f $all_vdevs + log_pos rmdir $TESTDIR } log_assert "'zpool create anyraid ...' can create a pool with maximum number of vdevs." diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_fault_export_import_online.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_fault_export_import_online.ksh index 3b24d36ab4d6..c37cc2016eec 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_fault_export_import_online.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_fault_export_import_online.ksh @@ -37,9 +37,9 @@ DISK1="$(echo $DISKS | cut -d' ' -f1)" DISK2="$(echo $DISKS | cut -d' ' -f2)" for type in "mirror" "anymirror1"; do - log_must zpool create -f $TESTPOOL $type $DISK1 $DISK2 + log_must zpool create -O compress=off -f $TESTPOOL $type $DISK1 $DISK2 if [[ "$type" == "anymirror1" ]]; then - log_must dd if=/dev/urandom of=/$TESTPOOL/f1 bs=1M count=2k + log_must dd if=/dev/zero of=/$TESTPOOL/f1 bs=1M count=2k log_must zpool sync log_must rm /$TESTPOOL/f1 fi diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_offline_export_import_online.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_offline_export_import_online.ksh index 5a5d3e18f6f0..33c747edc6c7 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_offline_export_import_online.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_offline_export_import_online.ksh @@ -45,7 +45,7 @@ DISK3="$(echo $DISKS | cut -d' ' -f3)" for type in "mirror" "anymirror1"; do - if [[ "$type" =~ "anyraid" ]]; then + if [[ "$type" =~ "anymirror" ]]; then export disks="$DISK1 $DISK2 $DISK3" else export disks="$DISK1 $DISK2" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_online_offline.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_online_offline.ksh index 6cd092bfc8f7..614fb1149425 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_online_offline.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_online_offline.ksh @@ -55,7 +55,7 @@ for type in "mirror" "anymirror1"; do log_must zpool create -f $TESTPOOL $type $DISK1 $DISK2 else log_must zpool create -f $TESTPOOL $type $DISK1 $DISK2 $DISK3 - log_must dd if=/dev/urandom of=/$TESTPOOL/f1 bs=1M count=400 + log_must file_write -o create -f /$TESTPOOL/f1 -b 1048576 -c 400 -d R log_must zpool sync log_must rm /$TESTPOOL/f1 log_must zpool sync diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_neg.ksh index 8fdf60ac4287..3313a11e9f54 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_neg.ksh @@ -47,9 +47,9 @@ DISK3="$(echo $DISKS | cut -d' ' -f3)" for type in "" "anymirror2"; do log_must zpool list -v - log_must zpool create -f $TESTPOOL $type $DISK1 $DISK2 $DISK3 + log_must zpool create -O compress=off -f $TESTPOOL $type $DISK1 $DISK2 $DISK3 if [[ "$type" == "anymirror2" ]]; then - log_must dd if=/dev/urandom of=/$TESTPOOL/f1 bs=1M count=2k + log_must file_write -o create -f /$TESTPOOL/f1 -b 1048576 -c 2000 -d Z log_must zpool sync log_must rm /$TESTPOOL/f1 fi diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh index b417b51ade65..2040ab42eba3 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh @@ -82,10 +82,10 @@ function status_check_all # pool disk-state for type in "" "anymirror1"; do # 1. Create a one-disk pool. - log_must zpool create -f $TESTPOOL $type $DISK1 $DISK2 $DISK3 + log_must zpool create -O compress=off -f $TESTPOOL $type $DISK1 $DISK2 $DISK3 status_check_all $TESTPOOL "uninitialized" if [[ "$type" == "anymirror1" ]]; then - log_must dd if=/dev/urandom of=/$TESTPOOL/f1 bs=1M count=2k + log_must file_write -o create -f /$TESTPOOL/f1 -b 1048576 -c 2000 -d Z log_must zpool sync log_must rm /$TESTPOOL/f1 fi diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh index d92336e7cb09..c5a092362dc9 100755 --- a/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh @@ -60,7 +60,7 @@ fi TESTFILE="/$TESTPOOL/$TESTFS/testfile" for type in "mirror" "raidz" "raidz2" "anymirror1" "anymirror2" "anymirror3"; do - if [[ "$type" =~ "anyraid" ]]; then + if [[ "$type" =~ "anymirror" ]]; then export VDEVSIZE=1073741824 export TESTFILE_SIZE=268435456 else diff --git a/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh b/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh index 36661cec167e..03eb17d92bec 100755 --- a/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh +++ b/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh @@ -77,7 +77,7 @@ for type in "" "mirror" "anymirror0" "anymirror1" "anymirror2" "anymirror3" "rai VDEVS="$TRIM_VDEV1" elif [[ "$type" = "mirror" ]]; then VDEVS="$TRIM_VDEV1 $TRIM_VDEV2" - elif [[ "$type" =~ "anyraid" ]]; then + elif [[ "$type" =~ "anymirror" ]]; then VDEVS="$TRIM_VDEV1 $TRIM_VDEV2 $TRIM_VDEV3 $TRIM_VDEV4" elif [[ "$type" = "raidz2" ]]; then VDEVS="$TRIM_VDEV1 $TRIM_VDEV2 $TRIM_VDEV3" From 336a39e19878c04624db0a2208a809a2017b7645 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Wed, 29 Oct 2025 10:48:45 -0700 Subject: [PATCH 17/21] Move two of the map copies to the end of the disk Signed-off-by: Paul Dagnelie --- include/sys/vdev_anyraid.h | 3 +++ module/zfs/vdev_anyraid.c | 27 +++++++++++++++++---------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/include/sys/vdev_anyraid.h b/include/sys/vdev_anyraid.h index 7c750adc6030..ebe715d46830 100644 --- a/include/sys/vdev_anyraid.h +++ b/include/sys/vdev_anyraid.h @@ -261,8 +261,11 @@ ame_get_type(anyraid_map_entry_t *ame) #define VDEV_ANYRAID_SINGLE_MAP_SIZE(ashift) \ ((VDEV_ANYRAID_MAP_HEADER_SIZE(ashift) + VDEV_ANYRAID_MAP_SIZE)) #define VDEV_ANYRAID_MAP_COPIES 4 +#define VDEV_ANYRAID_START_COPES (VDEV_ANYRAID_MAP_COPIES / 2) #define VDEV_ANYRAID_TOTAL_MAP_SIZE(ashift) (VDEV_ANYRAID_MAP_COPIES * \ VDEV_ANYRAID_SINGLE_MAP_SIZE(ashift)) +#define VDEV_ANYRAID_START_OFFSET(ashift) VDEV_ANYRAID_START_COPES * \ + VDEV_ANYRAID_SINGLE_MAP_SIZE(ashift) _Static_assert(VDEV_ANYRAID_TOTAL_MAP_SIZE(9) % SPA_MINBLOCKSIZE == 0, ""); _Static_assert(VDEV_ANYRAID_TOTAL_MAP_SIZE(12) % SPA_MINBLOCKSIZE == 0, ""); diff --git a/module/zfs/vdev_anyraid.c b/module/zfs/vdev_anyraid.c index 3d87cfb923f4..28b9dee1a46f 100644 --- a/module/zfs/vdev_anyraid.c +++ b/module/zfs/vdev_anyraid.c @@ -125,6 +125,17 @@ uint64_t zfs_anyraid_min_tile_size = (16ULL << 30); */ int anyraid_disk_shift = 6; +static inline uint64_t +vdev_anyraid_header_offset(vdev_t *vd, int id) +{ + uint64_t full_size = VDEV_ANYRAID_SINGLE_MAP_SIZE(vd->vdev_ashift); + if (id < VDEV_ANYRAID_START_COPES) + return (VDEV_LABEL_START_SIZE + id * full_size); + else + return (vd->vdev_psize - VDEV_LABEL_END_SIZE - + (VDEV_ANYRAID_MAP_COPIES - id) * full_size); +} + static inline int anyraid_tile_compare(const void *p1, const void *p2) { @@ -311,8 +322,7 @@ vdev_anyraid_open_header(vdev_t *cvd, int header, anyraid_header_t *out_header) { spa_t *spa = cvd->vdev_spa; uint64_t ashift = cvd->vdev_ashift; - uint64_t header_offset = VDEV_LABEL_START_SIZE + - header * VDEV_ANYRAID_SINGLE_MAP_SIZE(ashift); + uint64_t header_offset = vdev_anyraid_header_offset(cvd, header); uint64_t header_size = VDEV_ANYRAID_MAP_HEADER_SIZE(ashift); int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE; @@ -542,8 +552,7 @@ anyraid_open_existing(vdev_t *vd, uint64_t child, uint16_t **child_capacities) */ zio_t *rio = zio_root(spa, NULL, NULL, flags); abd_t *map_abds[VDEV_ANYRAID_MAP_COPIES] = {0}; - uint64_t header_offset = VDEV_LABEL_START_SIZE + - mapping * VDEV_ANYRAID_SINGLE_MAP_SIZE(ashift); + uint64_t header_offset = vdev_anyraid_header_offset(cvd, mapping); uint64_t map_offset = header_offset + header_size; int i; for (i = 0; i <= (map_length / SPA_MAXBLOCKSIZE); i++) { @@ -977,7 +986,7 @@ vdev_anyraid_mirror_start(zio_t *zio, anyraid_tile_t *tile) ASSERT(atn); mirror_child_t *mc = &mm->mm_child[c]; mc->mc_vd = vd->vdev_child[atn->atn_disk]; - mc->mc_offset = VDEV_ANYRAID_TOTAL_MAP_SIZE(vd->vdev_ashift) + + mc->mc_offset = VDEV_ANYRAID_START_OFFSET(vd->vdev_ashift) + atn->atn_offset * rsize + zio->io_offset % rsize; ASSERT3U(mc->mc_offset, <, mc->mc_vd->vdev_psize - VDEV_LABEL_END_SIZE); @@ -1086,7 +1095,7 @@ vdev_anyraid_io_start(zio_t *zio) vdev_t *cvd = vd->vdev_child[atn->atn_disk]; uint64_t child_offset = atn->atn_offset * rsize + zio->io_offset % rsize; - child_offset += VDEV_ANYRAID_TOTAL_MAP_SIZE(vd->vdev_ashift); + child_offset += VDEV_ANYRAID_START_OFFSET(vd->vdev_ashift); anyraid_map_t *mm = kmem_alloc(sizeof (*mm), KM_SLEEP); mm->am_abd = abd_get_offset(zio->io_abd, 0); @@ -1203,7 +1212,7 @@ vdev_anyraid_xlate(vdev_t *cvd, const zfs_range_seg64_t *logical_rs, uint64_t child_offset = atn->atn_offset * rsize + logical_rs->rs_start % rsize; - child_offset += VDEV_ANYRAID_TOTAL_MAP_SIZE(anyraidvd->vdev_ashift); + child_offset += VDEV_ANYRAID_START_OFFSET(anyraidvd->vdev_ashift); uint64_t size = logical_rs->rs_end - logical_rs->rs_start; physical_rs->rs_start = child_offset; @@ -1294,11 +1303,9 @@ vdev_anyraid_write_map_sync(vdev_t *vd, zio_t *pio, uint64_t txg, spa_t *spa = vd->vdev_spa; vdev_anyraid_t *var = anyraidvd->vdev_tsd; uint32_t header_size = VDEV_ANYRAID_MAP_HEADER_SIZE(vd->vdev_ashift); - uint32_t full_size = VDEV_ANYRAID_SINGLE_MAP_SIZE(vd->vdev_ashift); uint32_t nvl_bytes = VDEV_ANYRAID_NVL_BYTES(vd->vdev_ashift); uint8_t update_target = txg % VDEV_ANYRAID_MAP_COPIES; - uint64_t base_offset = VDEV_LABEL_START_SIZE + - update_target * full_size; + uint64_t base_offset = vdev_anyraid_header_offset(vd, update_target); abd_t *header_abd = abd_alloc_linear(header_size, B_TRUE); From 37c80d5a0632ced6af6e285da021d5215c1669a0 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Wed, 29 Oct 2025 11:57:11 -0700 Subject: [PATCH 18/21] fix zdb arg and checkpoint test Signed-off-by: Paul Dagnelie --- cmd/zdb/zdb.c | 2 +- .../zfs-tests/tests/functional/anyraid/anyraid_checkpoint.ksh | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 7507bbb02ffc..f8a3be39062d 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -10013,7 +10013,7 @@ main(int argc, char **argv) ARG_BLOCK_BIN_MODE}, {"class", required_argument, NULL, ARG_BLOCK_CLASSES}, - {"anyraid-map", required_argument, NULL, + {"anyraid-map", no_argument, NULL, ARG_ANYRAID_MAP}, {0, 0, 0, 0} }; diff --git a/tests/zfs-tests/tests/functional/anyraid/anyraid_checkpoint.ksh b/tests/zfs-tests/tests/functional/anyraid/anyraid_checkpoint.ksh index 5cafab5b3f06..76a68a9e4ba9 100755 --- a/tests/zfs-tests/tests/functional/anyraid/anyraid_checkpoint.ksh +++ b/tests/zfs-tests/tests/functional/anyraid/anyraid_checkpoint.ksh @@ -50,11 +50,12 @@ log_onexit cleanup log_must create_pool $TESTPOOL anymirror1 $DISKS log_assert "Anyraid works correctly with checkpoints" +log_must zdb --anyraid-map $TESTPOOL map=$(zdb --anyraid-map $TESTPOOL) log_must zpool checkpoint $TESTPOOL -log_must dd if=/dev/urandom of=/$TESTPOOL/f1 bs=1M count=2k +log_must file_write -o create -f /$TESTPOOL/f1 -b 1048576 -c 2048 -d R log_must zpool export $TESTPOOL log_must zpool import --rewind-to-checkpoint $TESTPOOL From b774f24206a66ae73ebf4edff2ecfb31513f9b3f Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Wed, 29 Oct 2025 12:11:58 -0700 Subject: [PATCH 19/21] punch holes in loopbacks Signed-off-by: Paul Dagnelie --- tests/zfs-tests/include/libtest.shlib | 25 +++++++++++++++++++ .../functional/anyraid/anyraid_common.kshlib | 3 +-- .../tests/functional/direct/dio.kshlib | 13 ---------- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index 85fd1869e2c7..2e4ff6890a4d 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -789,6 +789,23 @@ function assert (($@)) || log_fail "$@" } +function get_file_size +{ + typeset filename="$1" + + if is_linux; then + if [ -b "$filename" ] ; then + filesize=$(blockdev --getsize64 $filename) + else + filesize=$(stat -c %s $filename) + fi + else + filesize=$(stat -s $filename | awk '{print $8}' | grep -o '[0-9]\+') + fi + + echo $filesize +} + # # Function to format partition size of a disk # Given a disk cxtxdx reduces all partitions @@ -1599,6 +1616,14 @@ function create_pool #pool devs_list if is_global_zone ; then [[ -d /$pool ]] && rm -rf /$pool + + for vdev in "$@" ; do + if [[ "$vdev" =~ "loop" ]] ; then + # If the device is a loopback, remove previously + # allocated data. + punch_hole 0 $(get_file_size /dev/$vdev) /dev/$vdev + fi + done log_must zpool create -f $pool $@ fi diff --git a/tests/zfs-tests/tests/functional/anyraid/anyraid_common.kshlib b/tests/zfs-tests/tests/functional/anyraid/anyraid_common.kshlib index 47d004d55a94..1b4f7d15451c 100644 --- a/tests/zfs-tests/tests/functional/anyraid/anyraid_common.kshlib +++ b/tests/zfs-tests/tests/functional/anyraid/anyraid_common.kshlib @@ -46,8 +46,7 @@ function wipe_some_disks_and_verify_content_is_still_okay done for disk in $@; do - log_must dd if=/dev/zero of=$disk seek=8 bs=$DD_BLOCK \ - count=$(( DD_COUNT - 128 )) conv=notrunc + log_must punch_hole $((DD_BLOCK * 8)) $((DD_BLOCK * (DD_COUNT - 128))) $disk done # diff --git a/tests/zfs-tests/tests/functional/direct/dio.kshlib b/tests/zfs-tests/tests/functional/direct/dio.kshlib index 33564ccc71e6..c8a6e5c00ac6 100644 --- a/tests/zfs-tests/tests/functional/direct/dio.kshlib +++ b/tests/zfs-tests/tests/functional/direct/dio.kshlib @@ -261,19 +261,6 @@ function check_read # pool file bs count skip flags buf_rd dio_rd fi } -function get_file_size -{ - typeset filename="$1" - - if is_linux; then - filesize=$(stat -c %s $filename) - else - filesize=$(stat -s $filename | awk '{print $8}' | grep -o '[0-9]\+') - fi - - echo $filesize -} - function do_truncate_reduce { typeset filename=$1 From 33ba0e35ebe94e981495a5380dae7b6ec8f11e4d Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Mon, 3 Nov 2025 15:45:51 -0800 Subject: [PATCH 20/21] Add assertion to satisfy codeql Signed-off-by: Paul Dagnelie --- cmd/zdb/zdb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index f8a3be39062d..c11dc5f1aa0f 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -9452,7 +9452,9 @@ print_separator_line(int cols, int colwidth, boolean_t *print, boolean_t *final) // Create a buffer with the cell separator to make later code simpler. while (len < colwidth) { len++; - off += snprintf(buf + off, sizeof (buf) - off, "─"); + int n = snprintf(buf + off, sizeof (buf) - off, "─"); + ASSERT(n > 0 && n < sizeof (buf) - off); + off += n; } for (int i = 0; i < cols; i++) { From 79e47fab92a17bfcf11856a3b2fbf8a19fc3b2d5 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Tue, 11 Nov 2025 12:11:37 -0800 Subject: [PATCH 21/21] Fix test bugs Signed-off-by: Paul Dagnelie --- tests/zfs-tests/include/libtest.shlib | 15 ++++++++------- .../cli_root/zpool_add/zpool_add_001_pos.ksh | 1 + .../tests/functional/trim/autotrim_config.ksh | 8 +++++++- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index 2e4ff6890a4d..9683a247014f 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -1617,13 +1617,14 @@ function create_pool #pool devs_list if is_global_zone ; then [[ -d /$pool ]] && rm -rf /$pool - for vdev in "$@" ; do - if [[ "$vdev" =~ "loop" ]] ; then - # If the device is a loopback, remove previously - # allocated data. - punch_hole 0 $(get_file_size /dev/$vdev) /dev/$vdev - fi - done + for internal_vd in "$@" ; do + if [[ "$internal_vd" =~ "loop" ]] ; then + # If the device is a loopback, remove previously + # allocated data. + punch_hole 0 $(get_file_size /dev/$internal_vd) \ + /dev/$internal_vd + fi + done log_must zpool create -f $pool $@ fi diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_001_pos.ksh index b082a57f114e..8d6107ee3a86 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_001_pos.ksh @@ -68,6 +68,7 @@ raidzdevs="\"${DISK0} ${DISK1}\"" anyraiddevs="\"${extradisks}\"" draiddevs="\"${DISK0} ${DISK1} ${DISK2}\"" + typeset -i i=0 typeset vdev eval set -A poolarray $pooldevs diff --git a/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh b/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh index 03eb17d92bec..d012525ee6f6 100755 --- a/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh +++ b/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh @@ -70,6 +70,7 @@ log_must set_tunable64 VDEV_MIN_MS_COUNT 32 typeset VDEV_MAX_MB=$(( floor(4 * MINVDEVSIZE * 0.75 / 1024 / 1024) )) typeset VDEV_MIN_MB=$(( floor(4 * MINVDEVSIZE * 0.30 / 1024 / 1024) )) +typeset TXGS=64 for type in "" "mirror" "anymirror0" "anymirror1" "anymirror2" "anymirror3" "raidz2" "draid"; do @@ -79,6 +80,11 @@ for type in "" "mirror" "anymirror0" "anymirror1" "anymirror2" "anymirror3" "rai VDEVS="$TRIM_VDEV1 $TRIM_VDEV2" elif [[ "$type" =~ "anymirror" ]]; then VDEVS="$TRIM_VDEV1 $TRIM_VDEV2 $TRIM_VDEV3 $TRIM_VDEV4" + + # The per-vdev utilization is lower due to the capacity + # used by the tile map + VDEV_MAX_MB=$(( floor(4 * MINVDEVSIZE * 0.50 / 1024 / 1024) )) + TXGS=128 elif [[ "$type" = "raidz2" ]]; then VDEVS="$TRIM_VDEV1 $TRIM_VDEV2 $TRIM_VDEV3" elif [[ "$type" = "draid" ]]; then @@ -103,7 +109,7 @@ for type in "" "mirror" "anymirror0" "anymirror1" "anymirror2" "anymirror3" "rai # Remove the file, wait for trim, verify the vdevs are now sparse. log_must rm /$TESTPOOL/file - wait_trim_io $TESTPOOL "ind" 64 + wait_trim_io $TESTPOOL "ind" $TXGS verify_vdevs "-le" "$VDEV_MIN_MB" $VDEVS log_must zpool destroy $TESTPOOL