Skip to content

Commit c6b77e2

Browse files
Eric Wheelerkakra
authored andcommitted
bcache: introduce bcache sysfs entries for ioprio-based bypass/writeback hints
Add sysfs entries to support to hint for bypass/writeback by the ioprio assigned to the bio. If the bio is unassigned, use current's io-context ioprio for cache writeback or bypass (configured per-process with `ionice`). Having idle IOs bypass the cache can increase performance elsewhere since you probably don't care about their performance. In addition, this prevents idle IOs from promoting into (polluting) your cache and evicting blocks that are more important elsewhere. If you really nead the performance at the expense of SSD wearout, then configure ioprio_writeback and set your `ionice` appropriately. For example: echo 2,7 > /sys/block/bcache0/bcache/ioprio_bypass echo 2,0 > /sys/block/bcache0/bcache/ioprio_writeback See the documentation commit for details. Signed-off-by: Eric Wheeler <[email protected]> Acked-by: Kent Overstreet <[email protected]> Tested-by: Kai Krakow <[email protected]> Cc: [email protected]
1 parent d6591ea commit c6b77e2

File tree

5 files changed

+131
-0
lines changed

5 files changed

+131
-0
lines changed

drivers/md/bcache/bcache.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,9 @@ struct cached_dev {
392392
unsigned int error_limit;
393393
unsigned int offline_seconds;
394394

395+
unsigned short ioprio_writeback;
396+
unsigned short ioprio_bypass;
397+
395398
char backing_dev_name[BDEVNAME_SIZE];
396399
};
397400

drivers/md/bcache/request.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,8 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
379379
unsigned int sectors, congested;
380380
struct task_struct *task = current;
381381
struct io *i;
382+
struct io_context *ioc;
383+
unsigned short ioprio;
382384

383385
if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
384386
c->gc_stats.in_use > CUTOFF_CACHE_ADD ||
@@ -406,6 +408,28 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
406408
goto skip;
407409
}
408410

411+
/* If the ioprio already exists on the bio, use that. We assume that
412+
* the upper layer properly assigned the calling process's ioprio to
413+
* the bio being passed to bcache. Otherwise, use current's ioc. */
414+
ioprio = bio_prio(bio);
415+
if (!ioprio_valid(ioprio)) {
416+
ioc = get_task_io_context(current, GFP_NOIO, NUMA_NO_NODE);
417+
if (ioc) {
418+
if (ioprio_valid(ioc->ioprio))
419+
ioprio = ioc->ioprio;
420+
put_io_context(ioc);
421+
ioc = NULL;
422+
}
423+
}
424+
425+
/* If process ioprio is lower-or-equal to dc->ioprio_bypass, then
426+
* hint for bypass. Note that a lower-priority IO class+value
427+
* has a greater numeric value. */
428+
if (ioprio_valid(ioprio) && ioprio_valid(dc->ioprio_writeback)
429+
&& ioprio >= dc->ioprio_bypass) {
430+
goto skip;
431+
}
432+
409433
if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
410434
bio_sectors(bio) & (c->sb.block_size - 1)) {
411435
pr_debug("skipping unaligned io");

drivers/md/bcache/sysfs.c

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,9 @@ rw_attribute(copy_gc_enabled);
144144
rw_attribute(gc_after_writeback);
145145
rw_attribute(size);
146146

147+
rw_attribute(ioprio_writeback);
148+
rw_attribute(ioprio_bypass);
149+
147150
static ssize_t bch_snprint_string_list(char *buf,
148151
size_t size,
149152
const char * const list[],
@@ -272,6 +275,16 @@ SHOW(__bch_cached_dev)
272275
return strlen(buf);
273276
}
274277

278+
if (attr == &sysfs_ioprio_bypass)
279+
return snprintf(buf, PAGE_SIZE-1, "%d,%ld\n",
280+
IOPRIO_PRIO_CLASS(dc->ioprio_bypass),
281+
IOPRIO_PRIO_DATA(dc->ioprio_bypass));
282+
283+
if (attr == &sysfs_ioprio_writeback)
284+
return snprintf(buf, PAGE_SIZE-1, "%d,%ld\n",
285+
IOPRIO_PRIO_CLASS(dc->ioprio_writeback),
286+
IOPRIO_PRIO_DATA(dc->ioprio_writeback));
287+
275288
#undef var
276289
return 0;
277290
}
@@ -284,6 +297,10 @@ STORE(__cached_dev)
284297
ssize_t v;
285298
struct cache_set *c;
286299
struct kobj_uevent_env *env;
300+
unsigned ioprio_class = 0; /* invalid initial ioprio values */
301+
unsigned ioprio_level = IOPRIO_BE_NR;
302+
unsigned short *ioprio_hint = NULL;
303+
char *ioprio_type = NULL;
287304

288305
/* no user space access if system is rebooting */
289306
if (bcache_is_reboot)
@@ -430,6 +447,57 @@ STORE(__cached_dev)
430447
if (attr == &sysfs_stop)
431448
bcache_device_stop(&dc->disk);
432449

450+
/* ioprio hinting: we use ioprio_hint to reduce duplicate printk verbiage */
451+
if (attr == &sysfs_ioprio_writeback) {
452+
ioprio_hint = &dc->ioprio_writeback;
453+
ioprio_type = "writeback";
454+
}
455+
456+
if (attr == &sysfs_ioprio_bypass) {
457+
ioprio_hint = &dc->ioprio_bypass;
458+
ioprio_type = "bypass";
459+
}
460+
461+
if (ioprio_hint != NULL)
462+
{
463+
if (sscanf(buf, "%u,%u", &ioprio_class, &ioprio_level) != 2
464+
|| ioprio_class > IOPRIO_CLASS_IDLE
465+
|| ioprio_level >= IOPRIO_BE_NR) {
466+
pr_err("ioprio_%s invalid, expecting: (class,level) but parsed (%u,%u); ignored.",
467+
ioprio_type,
468+
ioprio_class, ioprio_level);
469+
return size;
470+
}
471+
472+
/* Use the maximum(/minimum) value in the class shift space to make integer
473+
comparison correct for ioprio_writeback(/ioprio_bypass) for IOPRIO_CLASS_IDLE.
474+
This is necessary because there are no ioprio levels for the idle class. */
475+
if (ioprio_class == IOPRIO_CLASS_IDLE) {
476+
if (ioprio_hint == &dc->ioprio_writeback)
477+
ioprio_level = IOPRIO_PRIO_MASK;
478+
else
479+
/* Same, but 0 for bypass (inverted vs. writeback) */
480+
ioprio_level = 0;
481+
}
482+
483+
*ioprio_hint = IOPRIO_PRIO_VALUE(ioprio_class, ioprio_level);
484+
485+
if (!ioprio_valid(*ioprio_hint))
486+
pr_info("disabled ioprio_%s hints.", ioprio_type);
487+
else
488+
pr_info("set hint for cache %s with priority %s: (class,level) = (%u,%u)",
489+
ioprio_type,
490+
( ioprio_hint == &dc->ioprio_writeback ? "at-or-above" : "at-or-below" ),
491+
ioprio_class, ioprio_level);
492+
493+
if (ioprio_valid(dc->ioprio_writeback)
494+
&& ioprio_valid(dc->ioprio_bypass)
495+
&& dc->ioprio_writeback >= dc->ioprio_bypass)
496+
pr_warning(
497+
"warning: ioprio_writeback hint is neither disabled nor higher priority than the bypass hint; "
498+
"will always writeback!");
499+
}
500+
433501
return size;
434502
}
435503

@@ -517,6 +585,8 @@ static struct attribute *bch_cached_dev_files[] = {
517585
#endif
518586
&sysfs_backing_dev_name,
519587
&sysfs_backing_dev_uuid,
588+
&sysfs_ioprio_bypass,
589+
&sysfs_ioprio_writeback,
520590
NULL
521591
};
522592
KTYPE(bch_cached_dev);

drivers/md/bcache/writeback.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -821,6 +821,16 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
821821
dc->writeback_rate_p_term_inverse = 40;
822822
dc->writeback_rate_i_term_inverse = 10000;
823823

824+
/*
825+
* These defaults provide the best SSD life by enabling bypass
826+
* for priorities at-or-below BE-7. This also provides better
827+
* performance (cache hits) by preventing (near-)idle processes from
828+
* polluting the cache working set. Only set ioprio_writeback if
829+
* you really need it: it will wear out your SSD sooner.
830+
*/
831+
dc->ioprio_writeback = IOPRIO_PRIO_VALUE(0, 0);
832+
dc->ioprio_bypass = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, (IOPRIO_BE_NR-1));
833+
824834
WARN_ON(test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
825835
INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
826836
}

drivers/md/bcache/writeback.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
6565
unsigned int cache_mode, bool would_skip)
6666
{
6767
unsigned int in_use = dc->disk.c->gc_stats.in_use;
68+
struct io_context *ioc;
69+
unsigned short ioprio;
6870

6971
if (cache_mode != CACHE_MODE_WRITEBACK ||
7072
test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
@@ -82,6 +84,28 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
8284
if (would_skip)
8385
return false;
8486

87+
/* If the ioprio already exists on the bio, use that. We assume that
88+
* the upper layer properly assigned the calling process's ioprio to
89+
* the bio being passed to bcache. Otherwise, use current's ioc. */
90+
ioprio = bio_prio(bio);
91+
if (!ioprio_valid(ioprio)) {
92+
ioc = get_task_io_context(current, GFP_NOIO, NUMA_NO_NODE);
93+
if (ioc) {
94+
if (ioprio_valid(ioc->ioprio))
95+
ioprio = ioc->ioprio;
96+
put_io_context(ioc);
97+
ioc = NULL;
98+
}
99+
}
100+
101+
/* If process ioprio is higher-or-equal to dc->ioprio_writeback, then
102+
* hint for writeback. Note that a higher-priority IO class+value
103+
* has a lesser numeric value. */
104+
if (ioprio_valid(ioprio) && ioprio_valid(dc->ioprio_writeback)
105+
&& ioprio <= dc->ioprio_writeback) {
106+
return true;
107+
}
108+
85109
return (op_is_sync(bio->bi_opf) ||
86110
bio->bi_opf & (REQ_META|REQ_PRIO) ||
87111
in_use <= bch_cutoff_writeback);

0 commit comments

Comments
 (0)