Skip to content

Commit 0e7ab7e

Browse files
Dave Chinnerdjwong
authored andcommitted
xfs: Throttle commits on delayed background CIL push
In certain situations the background CIL push can be indefinitely delayed. While we have workarounds from the obvious cases now, it doesn't solve the underlying issue. This issue is that there is no upper limit on the CIL where we will either force or wait for a background push to start, hence allowing the CIL to grow without bound until it consumes all log space. To fix this, add a new wait queue to the CIL which allows background pushes to wait for the CIL context to be switched out. This happens when the push starts, so it will allow us to block incoming transaction commit completion until the push has started. This will only affect processes that are running modifications, and only when the CIL threshold has been significantly overrun. This has no apparent impact on performance, and doesn't even trigger until over 45 million inodes had been created in a 16-way fsmark test on a 2GB log. That was limiting at 64MB of log space used, so the active CIL size is only about 3% of the total log in that case. The concurrent removal of those files did not trigger the background sleep at all. Signed-off-by: Dave Chinner <[email protected]> Reviewed-by: Allison Collins <[email protected]> Reviewed-by: Darrick J. Wong <[email protected]> Signed-off-by: Darrick J. Wong <[email protected]>
1 parent 108a423 commit 0e7ab7e

File tree

3 files changed

+58
-4
lines changed

3 files changed

+58
-4
lines changed

fs/xfs/xfs_log_cil.c

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,11 @@ xlog_cil_push_work(
668668
push_seq = cil->xc_push_seq;
669669
ASSERT(push_seq <= ctx->sequence);
670670

671+
/*
672+
* Wake up any background push waiters now this context is being pushed.
673+
*/
674+
wake_up_all(&ctx->push_wait);
675+
671676
/*
672677
* Check if we've anything to push. If there is nothing, then we don't
673678
* move on to a new sequence number and so we have to be able to push
@@ -744,6 +749,7 @@ xlog_cil_push_work(
744749
*/
745750
INIT_LIST_HEAD(&new_ctx->committing);
746751
INIT_LIST_HEAD(&new_ctx->busy_extents);
752+
init_waitqueue_head(&new_ctx->push_wait);
747753
new_ctx->sequence = ctx->sequence + 1;
748754
new_ctx->cil = cil;
749755
cil->xc_ctx = new_ctx;
@@ -891,7 +897,7 @@ xlog_cil_push_work(
891897
*/
892898
static void
893899
xlog_cil_push_background(
894-
struct xlog *log)
900+
struct xlog *log) __releases(cil->xc_ctx_lock)
895901
{
896902
struct xfs_cil *cil = log->l_cilp;
897903

@@ -905,14 +911,36 @@ xlog_cil_push_background(
905911
* don't do a background push if we haven't used up all the
906912
* space available yet.
907913
*/
908-
if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
914+
if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) {
915+
up_read(&cil->xc_ctx_lock);
909916
return;
917+
}
910918

911919
spin_lock(&cil->xc_push_lock);
912920
if (cil->xc_push_seq < cil->xc_current_sequence) {
913921
cil->xc_push_seq = cil->xc_current_sequence;
914922
queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
915923
}
924+
925+
/*
926+
* Drop the context lock now, we can't hold that if we need to sleep
927+
* because we are over the blocking threshold. The push_lock is still
928+
* held, so blocking threshold sleep/wakeup is still correctly
929+
* serialised here.
930+
*/
931+
up_read(&cil->xc_ctx_lock);
932+
933+
/*
934+
* If we are well over the space limit, throttle the work that is being
935+
* done until the push work on this context has begun.
936+
*/
937+
if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) {
938+
trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket);
939+
ASSERT(cil->xc_ctx->space_used < log->l_logsize);
940+
xlog_wait(&cil->xc_ctx->push_wait, &cil->xc_push_lock);
941+
return;
942+
}
943+
916944
spin_unlock(&cil->xc_push_lock);
917945

918946
}
@@ -1032,9 +1060,9 @@ xfs_log_commit_cil(
10321060
if (lip->li_ops->iop_committing)
10331061
lip->li_ops->iop_committing(lip, xc_commit_lsn);
10341062
}
1035-
xlog_cil_push_background(log);
10361063

1037-
up_read(&cil->xc_ctx_lock);
1064+
/* xlog_cil_push_background() releases cil->xc_ctx_lock */
1065+
xlog_cil_push_background(log);
10381066
}
10391067

10401068
/*
@@ -1193,6 +1221,7 @@ xlog_cil_init(
11931221

11941222
INIT_LIST_HEAD(&ctx->committing);
11951223
INIT_LIST_HEAD(&ctx->busy_extents);
1224+
init_waitqueue_head(&ctx->push_wait);
11961225
ctx->sequence = 1;
11971226
ctx->cil = cil;
11981227
cil->xc_ctx = ctx;

fs/xfs/xfs_log_priv.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ struct xfs_cil_ctx {
240240
struct xfs_log_vec *lv_chain; /* logvecs being pushed */
241241
struct list_head iclog_entry;
242242
struct list_head committing; /* ctx committing list */
243+
wait_queue_head_t push_wait; /* background push throttle */
243244
struct work_struct discard_endio_work;
244245
};
245246

@@ -337,10 +338,33 @@ struct xfs_cil {
337338
* buffer window (32MB) as measurements have shown this to be roughly the
338339
* point of diminishing performance increases under highly concurrent
339340
* modification workloads.
341+
*
342+
* To prevent the CIL from overflowing upper commit size bounds, we introduce a
343+
* new threshold at which we block committing transactions until the background
344+
* CIL commit commences and switches to a new context. While this is not a hard
345+
* limit, it forces the process committing a transaction to the CIL to block and
346+
* yeild the CPU, giving the CIL push work a chance to be scheduled and start
347+
* work. This prevents a process running lots of transactions from overfilling
348+
* the CIL because it is not yielding the CPU. We set the blocking limit at
349+
* twice the background push space threshold so we keep in line with the AIL
350+
* push thresholds.
351+
*
352+
* Note: this is not a -hard- limit as blocking is applied after the transaction
353+
* is inserted into the CIL and the push has been triggered. It is largely a
354+
* throttling mechanism that allows the CIL push to be scheduled and run. A hard
355+
* limit will be difficult to implement without introducing global serialisation
356+
* in the CIL commit fast path, and it's not at all clear that we actually need
357+
* such hard limits given the ~7 years we've run without a hard limit before
358+
* finding the first situation where a checkpoint size overflow actually
359+
* occurred. Hence the simple throttle, and an ASSERT check to tell us that
360+
* we've overrun the max size.
340361
*/
341362
#define XLOG_CIL_SPACE_LIMIT(log) \
342363
min_t(int, (log)->l_logsize >> 3, BBTOB(XLOG_TOTAL_REC_SHIFT(log)) << 4)
343364

365+
#define XLOG_CIL_BLOCKING_SPACE_LIMIT(log) \
366+
(XLOG_CIL_SPACE_LIMIT(log) * 2)
367+
344368
/*
345369
* ticket grant locks, queues and accounting have their own cachlines
346370
* as these are quite hot and can be operated on concurrently.

fs/xfs/xfs_trace.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,6 +1015,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_ticket_regrant_sub);
10151015
DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant);
10161016
DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant_sub);
10171017
DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant_exit);
1018+
DEFINE_LOGGRANT_EVENT(xfs_log_cil_wait);
10181019

10191020
DECLARE_EVENT_CLASS(xfs_log_item_class,
10201021
TP_PROTO(struct xfs_log_item *lip),

0 commit comments

Comments
 (0)