Skip to content

Commit 0ea44e5

Browse files
authored
Fix deduplication of overridden blocks
Implementation of DDT pruning introduced verification of DVAs in a block pointer during ddt_lookup() to not by mistake free previous pruned incarnation of the entry. But when writing a new block in zio_ddt_write() we might have the DVAs only from override pointer, which may never have "D" flag to be confused with pruned DDT entry, and we'll abandon those DVAs if we find a matching entry in DDT. This fixes deduplication for blocks written via dmu_sync() for purposes of indirect ZIL write records, that I have tested. And I suspect it might actually allow deduplication for Direct I/O, even though in an odd way -- first write block directly and then delete it later during TXG commit if found duplicate, which part I haven't tested. Reviewed-by: Tony Hutter <[email protected]> Signed-off-by: Alexander Motin <[email protected]> Sponsored by: iXsystems, Inc. Closes #17120
1 parent 09f4dd0 commit 0ea44e5

File tree

4 files changed

+19
-11
lines changed

4 files changed

+19
-11
lines changed

cmd/zdb/zdb.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5864,7 +5864,7 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
58645864
* Find the block. This will create the entry in memory, but
58655865
* we'll know if that happened by its refcount.
58665866
*/
5867-
ddt_entry_t *dde = ddt_lookup(ddt, bp);
5867+
ddt_entry_t *dde = ddt_lookup(ddt, bp, B_TRUE);
58685868

58695869
/*
58705870
* ddt_lookup() can return NULL if this block didn't exist

include/sys/ddt.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,8 @@ extern void ddt_enter(ddt_t *ddt);
378378
extern void ddt_exit(ddt_t *ddt);
379379
extern void ddt_init(void);
380380
extern void ddt_fini(void);
381-
extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp);
381+
extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp,
382+
boolean_t verify);
382383
extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde);
383384
extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp);
384385
extern void ddt_prefetch_all(spa_t *spa);

module/zfs/ddt.c

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1106,7 +1106,7 @@ ddt_entry_lookup_is_valid(ddt_t *ddt, const blkptr_t *bp, ddt_entry_t *dde)
11061106
}
11071107

11081108
ddt_entry_t *
1109-
ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
1109+
ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t verify)
11101110
{
11111111
spa_t *spa = ddt->ddt_spa;
11121112
ddt_key_t search;
@@ -1141,7 +1141,7 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
11411141
/* If it's already loaded, we can just return it. */
11421142
DDT_KSTAT_BUMP(ddt, dds_lookup_live_hit);
11431143
if (dde->dde_flags & DDE_FLAG_LOADED) {
1144-
if (ddt_entry_lookup_is_valid(ddt, bp, dde))
1144+
if (!verify || ddt_entry_lookup_is_valid(ddt, bp, dde))
11451145
return (dde);
11461146
return (NULL);
11471147
}
@@ -1165,7 +1165,7 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
11651165
DDT_KSTAT_BUMP(ddt, dds_lookup_existing);
11661166

11671167
/* Make sure the loaded entry matches the BP */
1168-
if (ddt_entry_lookup_is_valid(ddt, bp, dde))
1168+
if (!verify || ddt_entry_lookup_is_valid(ddt, bp, dde))
11691169
return (dde);
11701170
return (NULL);
11711171
} else
@@ -1194,7 +1194,8 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
11941194
memcpy(dde->dde_phys, &ddlwe.ddlwe_phys,
11951195
DDT_PHYS_SIZE(ddt));
11961196
/* Whatever we found isn't valid for this BP, eject */
1197-
if (!ddt_entry_lookup_is_valid(ddt, bp, dde)) {
1197+
if (verify &&
1198+
!ddt_entry_lookup_is_valid(ddt, bp, dde)) {
11981199
avl_remove(&ddt->ddt_tree, dde);
11991200
ddt_free(ddt, dde);
12001201
return (NULL);
@@ -1276,7 +1277,7 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
12761277
* worth the effort to deal with without taking an entry
12771278
* update.
12781279
*/
1279-
valid = ddt_entry_lookup_is_valid(ddt, bp, dde);
1280+
valid = !verify || ddt_entry_lookup_is_valid(ddt, bp, dde);
12801281
if (!valid && dde->dde_waiters == 0) {
12811282
avl_remove(&ddt->ddt_tree, dde);
12821283
ddt_free(ddt, dde);
@@ -2469,7 +2470,7 @@ ddt_addref(spa_t *spa, const blkptr_t *bp)
24692470
ddt = ddt_select(spa, bp);
24702471
ddt_enter(ddt);
24712472

2472-
dde = ddt_lookup(ddt, bp);
2473+
dde = ddt_lookup(ddt, bp, B_TRUE);
24732474

24742475
/* Can be NULL if the entry for this block was pruned. */
24752476
if (dde == NULL) {
@@ -2551,7 +2552,7 @@ prune_candidates_sync(void *arg, dmu_tx_t *tx)
25512552
ddt_bp_create(ddt->ddt_checksum, &dpe->dpe_key,
25522553
dpe->dpe_phys, DDT_PHYS_FLAT, &blk);
25532554

2554-
ddt_entry_t *dde = ddt_lookup(ddt, &blk);
2555+
ddt_entry_t *dde = ddt_lookup(ddt, &blk, B_TRUE);
25552556
if (dde != NULL && !(dde->dde_flags & DDE_FLAG_LOGGED)) {
25562557
ASSERT(dde->dde_flags & DDE_FLAG_LOADED);
25572558
/*

module/zfs/zio.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3717,7 +3717,13 @@ zio_ddt_write(zio_t *zio)
37173717
ASSERT3B(zio->io_prop.zp_direct_write, ==, B_FALSE);
37183718

37193719
ddt_enter(ddt);
3720-
dde = ddt_lookup(ddt, bp);
3720+
/*
3721+
* Search DDT for matching entry. Skip DVAs verification here, since
3722+
* they can go only from override, and once we get here the override
3723+
* pointer can't have "D" flag to be confused with pruned DDT entries.
3724+
*/
3725+
IMPLY(zio->io_bp_override, !BP_GET_DEDUP(zio->io_bp_override));
3726+
dde = ddt_lookup(ddt, bp, B_FALSE);
37213727
if (dde == NULL) {
37223728
/* DDT size is over its quota so no new entries */
37233729
zp->zp_dedup = B_FALSE;
@@ -3993,7 +3999,7 @@ zio_ddt_free(zio_t *zio)
39933999
ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
39944000

39954001
ddt_enter(ddt);
3996-
freedde = dde = ddt_lookup(ddt, bp);
4002+
freedde = dde = ddt_lookup(ddt, bp, B_TRUE);
39974003
if (dde) {
39984004
ddt_phys_variant_t v = ddt_phys_select(ddt, dde, bp);
39994005
if (v != DDT_PHYS_NONE)

0 commit comments

Comments
 (0)