Skip to content

Commit f8a112d

Browse files
ixhamzaFedorUporovVstack
authored andcommitted
zed: Ensure spare activation after kernel-initiated device removal
In addition to hotplug events, the kernel may also mark a failing vdev as REMOVED. This was observed in a customer report and reproduced by forcing the NVMe host driver to disable the device after a failed reset due to command timeout. In such cases, the spare was not activated because the device had already transitioned to a REMOVED state before zed processed the event. To address this, explicitly attempt hot spare activation when the kernel marks a device as REMOVED. Reviewed-by: Alexander Motin <[email protected]> Reviewed-by: Tony Hutter <[email protected]> Signed-off-by: Ameer Hamza <[email protected]> Closes openzfs#17187
1 parent aa87633 commit f8a112d

File tree

5 files changed

+46
-15
lines changed

5 files changed

+46
-15
lines changed

cmd/zed/agents/zfs_retire.c

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,7 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
404404
(state == VDEV_STATE_REMOVED || state == VDEV_STATE_FAULTED))) {
405405
const char *devtype;
406406
char *devname;
407+
boolean_t skip_removal = B_FALSE;
407408

408409
if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
409410
&devtype) == 0) {
@@ -441,18 +442,28 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
441442
nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
442443
(uint64_t **)&vs, &c);
443444

445+
if (vs->vs_state == VDEV_STATE_OFFLINE)
446+
return;
447+
444448
/*
445449
* If state removed is requested for already removed vdev,
446450
* its a loopback event from spa_async_remove(). Just
447451
* ignore it.
448452
*/
449-
if ((vs->vs_state == VDEV_STATE_REMOVED && state ==
450-
VDEV_STATE_REMOVED) || vs->vs_state == VDEV_STATE_OFFLINE)
451-
return;
453+
if ((vs->vs_state == VDEV_STATE_REMOVED &&
454+
state == VDEV_STATE_REMOVED)) {
455+
if (strcmp(class, "resource.fs.zfs.removed") == 0 &&
456+
nvlist_exists(nvl, "by_kernel")) {
457+
skip_removal = B_TRUE;
458+
} else {
459+
return;
460+
}
461+
}
452462

453463
/* Remove the vdev since device is unplugged */
454464
int remove_status = 0;
455-
if (l2arc || (strcmp(class, "resource.fs.zfs.removed") == 0)) {
465+
if (!skip_removal && (l2arc ||
466+
(strcmp(class, "resource.fs.zfs.removed") == 0))) {
456467
remove_status = zpool_vdev_remove_wanted(zhp, devname);
457468
fmd_hdl_debug(hdl, "zpool_vdev_remove_wanted '%s'"
458469
", err:%d", devname, libzfs_errno(zhdl));

include/sys/spa.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -784,6 +784,7 @@ extern int bpobj_enqueue_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
784784
#define SPA_ASYNC_L2CACHE_TRIM 0x1000
785785
#define SPA_ASYNC_REBUILD_DONE 0x2000
786786
#define SPA_ASYNC_DETACH_SPARE 0x4000
787+
#define SPA_ASYNC_REMOVE_BY_USER 0x8000
787788

788789
/* device manipulation */
789790
extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot, boolean_t ashift_check);
@@ -1179,7 +1180,7 @@ extern void zfs_ereport_taskq_fini(void);
11791180
extern void zfs_ereport_clear(spa_t *spa, vdev_t *vd);
11801181
extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type,
11811182
const char *name, nvlist_t *aux);
1182-
extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
1183+
extern void zfs_post_remove(spa_t *spa, vdev_t *vd, boolean_t by_kernel);
11831184
extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate);
11841185
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
11851186
extern uint64_t spa_approx_errlog_size(spa_t *spa);

module/zfs/spa.c

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8921,7 +8921,7 @@ spa_scan_range(spa_t *spa, pool_scan_func_t func, uint64_t txgstart,
89218921
*/
89228922

89238923
static void
8924-
spa_async_remove(spa_t *spa, vdev_t *vd)
8924+
spa_async_remove(spa_t *spa, vdev_t *vd, boolean_t by_kernel)
89258925
{
89268926
if (vd->vdev_remove_wanted) {
89278927
vd->vdev_remove_wanted = B_FALSE;
@@ -8941,11 +8941,11 @@ spa_async_remove(spa_t *spa, vdev_t *vd)
89418941
vdev_state_dirty(vd->vdev_top);
89428942

89438943
/* Tell userspace that the vdev is gone. */
8944-
zfs_post_remove(spa, vd);
8944+
zfs_post_remove(spa, vd, by_kernel);
89458945
}
89468946

89478947
for (int c = 0; c < vd->vdev_children; c++)
8948-
spa_async_remove(spa, vd->vdev_child[c]);
8948+
spa_async_remove(spa, vd->vdev_child[c], by_kernel);
89498949
}
89508950

89518951
static void
@@ -9039,13 +9039,18 @@ spa_async_thread(void *arg)
90399039
/*
90409040
* See if any devices need to be marked REMOVED.
90419041
*/
9042-
if (tasks & SPA_ASYNC_REMOVE) {
9042+
if (tasks & (SPA_ASYNC_REMOVE | SPA_ASYNC_REMOVE_BY_USER)) {
9043+
boolean_t by_kernel = B_TRUE;
9044+
if (tasks & SPA_ASYNC_REMOVE_BY_USER)
9045+
by_kernel = B_FALSE;
90439046
spa_vdev_state_enter(spa, SCL_NONE);
9044-
spa_async_remove(spa, spa->spa_root_vdev);
9047+
spa_async_remove(spa, spa->spa_root_vdev, by_kernel);
90459048
for (int i = 0; i < spa->spa_l2cache.sav_count; i++)
9046-
spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]);
9049+
spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i],
9050+
by_kernel);
90479051
for (int i = 0; i < spa->spa_spares.sav_count; i++)
9048-
spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]);
9052+
spa_async_remove(spa, spa->spa_spares.sav_vdevs[i],
9053+
by_kernel);
90499054
(void) spa_vdev_state_exit(spa, NULL, 0);
90509055
}
90519056

module/zfs/vdev.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4271,7 +4271,7 @@ vdev_remove_wanted(spa_t *spa, uint64_t guid)
42714271
return (spa_vdev_state_exit(spa, NULL, SET_ERROR(EEXIST)));
42724272

42734273
vd->vdev_remove_wanted = B_TRUE;
4274-
spa_async_request(spa, SPA_ASYNC_REMOVE);
4274+
spa_async_request(spa, SPA_ASYNC_REMOVE_BY_USER);
42754275

42764276
return (spa_vdev_state_exit(spa, vd, 0));
42774277
}

module/zfs/zfs_fm.c

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1433,9 +1433,23 @@ zfs_post_common(spa_t *spa, vdev_t *vd, const char *type, const char *name,
14331433
* removal.
14341434
*/
14351435
void
1436-
zfs_post_remove(spa_t *spa, vdev_t *vd)
1436+
zfs_post_remove(spa_t *spa, vdev_t *vd, boolean_t by_kernel)
14371437
{
1438-
zfs_post_common(spa, vd, FM_RSRC_CLASS, FM_RESOURCE_REMOVED, NULL);
1438+
nvlist_t *aux = NULL;
1439+
1440+
if (by_kernel) {
1441+
/*
1442+
* Add optional supplemental keys to payload
1443+
*/
1444+
aux = fm_nvlist_create(NULL);
1445+
if (aux)
1446+
fnvlist_add_boolean(aux, "by_kernel");
1447+
}
1448+
1449+
zfs_post_common(spa, vd, FM_RSRC_CLASS, FM_RESOURCE_REMOVED, aux);
1450+
1451+
if (by_kernel && aux)
1452+
fm_nvlist_destroy(aux, FM_NVA_FREE);
14391453
}
14401454

14411455
/*

0 commit comments

Comments
 (0)