|
23 | 23 | * Copyright (c) 2011, 2022 by Delphix. All rights reserved.
|
24 | 24 | * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
|
25 | 25 | * Copyright (c) 2017, Intel Corporation.
|
26 |
| - * Copyright (c) 2019, 2023, 2024, Klara Inc. |
| 26 | + * Copyright (c) 2019, 2023, 2024, 2025, Klara, Inc. |
27 | 27 | * Copyright (c) 2019, Allan Jude
|
28 | 28 | * Copyright (c) 2021, Datto, Inc.
|
29 | 29 | * Copyright (c) 2021, 2024 by George Melikov. All rights reserved.
|
@@ -2537,13 +2537,29 @@ zio_reexecute(void *arg)
|
2537 | 2537 | pio->io_state[ZIO_WAIT_READY] = (pio->io_stage >= ZIO_STAGE_READY) ||
|
2538 | 2538 | (pio->io_pipeline & ZIO_STAGE_READY) == 0;
|
2539 | 2539 | pio->io_state[ZIO_WAIT_DONE] = (pio->io_stage >= ZIO_STAGE_DONE);
|
| 2540 | + |
| 2541 | + /* |
| 2542 | + * It's possible for a failed ZIO to be a descendant of more than one |
| 2543 | + * ZIO tree. When reexecuting it, we have to be sure to add its wait |
| 2544 | + * states to all parent wait counts. |
| 2545 | + * |
| 2546 | + * Those parents, in turn, may have other children that are currently |
| 2547 | + * active, usually because they've already been reexecuted after |
| 2548 | + * resuming. Those children may be executing and may call |
| 2549 | + * zio_notify_parent() at the same time as we're updating our parent's |
| 2550 | + * counts. To avoid races while updating the counts, we take |
| 2551 | + * gio->io_lock before each update. |
| 2552 | + */ |
2540 | 2553 | zio_link_t *zl = NULL;
|
2541 | 2554 | while ((gio = zio_walk_parents(pio, &zl)) != NULL) {
|
| 2555 | + mutex_enter(&gio->io_lock); |
2542 | 2556 | for (int w = 0; w < ZIO_WAIT_TYPES; w++) {
|
2543 | 2557 | gio->io_children[pio->io_child_type][w] +=
|
2544 | 2558 | !pio->io_state[w];
|
2545 | 2559 | }
|
| 2560 | + mutex_exit(&gio->io_lock); |
2546 | 2561 | }
|
| 2562 | + |
2547 | 2563 | for (int c = 0; c < ZIO_CHILD_TYPES; c++)
|
2548 | 2564 | pio->io_child_error[c] = 0;
|
2549 | 2565 |
|
|
0 commit comments