aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2010-05-17 01:52:13 -0400
committerAlex Elder <aelder@sgi.com>2010-05-24 11:38:20 -0400
commitdf806158b0f6eb24247773b4a19b8b59d7217e59 (patch)
treea6fb142258aabf03011aadd14c9cf6ade9033d58 /fs
parent9da1ab181ac1790f86528b86ba5876f037e8dcdc (diff)
xfs: enable background pushing of the CIL
If we let the CIL grow without bound, it will grow large enough to violate recovery constraints (must be at least one complete transaction in the log at all times) or take forever to write out through the log buffers. Hence we need a check during asynchronous transactions as to whether the CIL needs to be pushed. We track the amount of log space the CIL consumes, so it is relatively simple to limit it on a pure size basis. Make the limit the minimum of just under half the log size (recovery constraint) or 8MB of log space (which is an awful lot of metadata). Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Alex Elder <aelder@sgi.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/xfs_log_cil.c29
-rw-r--r--fs/xfs/xfs_log_priv.h45
2 files changed, 70 insertions, 4 deletions
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 53abd6b0a333..9b21f80f31ce 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -336,6 +336,7 @@ xfs_log_commit_cil(
336{ 336{
337 struct log *log = mp->m_log; 337 struct log *log = mp->m_log;
338 int log_flags = 0; 338 int log_flags = 0;
339 int push = 0;
339 340
340 if (flags & XFS_TRANS_RELEASE_LOG_RES) 341 if (flags & XFS_TRANS_RELEASE_LOG_RES)
341 log_flags = XFS_LOG_REL_PERM_RESERV; 342 log_flags = XFS_LOG_REL_PERM_RESERV;
@@ -365,8 +366,20 @@ xfs_log_commit_cil(
365 xfs_log_done(mp, tp->t_ticket, NULL, log_flags); 366 xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
366 xfs_trans_unreserve_and_mod_sb(tp); 367 xfs_trans_unreserve_and_mod_sb(tp);
367 368
368 /* background commit is allowed again */ 369 /* check for background commit before unlock */
370 if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
371 push = 1;
369 up_read(&log->l_cilp->xc_ctx_lock); 372 up_read(&log->l_cilp->xc_ctx_lock);
373
374 /*
375 * We need to push CIL every so often so we don't cache more than we
376 * can fit in the log. The limit really is that a checkpoint can't be
377 * more than half the log (the current checkpoint is not allowed to
378 * overwrite the previous checkpoint), but commit latency and memory
379 * usage limit this to a smaller size in most cases.
380 */
381 if (push)
382 xlog_cil_push(log, 0);
370 return 0; 383 return 0;
371} 384}
372 385
@@ -429,18 +442,25 @@ xlog_cil_push(
429 if (!cil) 442 if (!cil)
430 return 0; 443 return 0;
431 444
432 /* XXX: don't sleep for background? */
433 new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); 445 new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
434 new_ctx->ticket = xlog_cil_ticket_alloc(log); 446 new_ctx->ticket = xlog_cil_ticket_alloc(log);
435 447
436 /* lock out transaction commit */ 448 /* lock out transaction commit, but don't block on background push */
437 down_write(&cil->xc_ctx_lock); 449 if (!down_write_trylock(&cil->xc_ctx_lock)) {
450 if (!push_now)
451 goto out_free_ticket;
452 down_write(&cil->xc_ctx_lock);
453 }
438 ctx = cil->xc_ctx; 454 ctx = cil->xc_ctx;
439 455
440 /* check if we've anything to push */ 456 /* check if we've anything to push */
441 if (list_empty(&cil->xc_cil)) 457 if (list_empty(&cil->xc_cil))
442 goto out_skip; 458 goto out_skip;
443 459
460 /* check for spurious background flush */
461 if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
462 goto out_skip;
463
444 /* 464 /*
445 * pull all the log vectors off the items in the CIL, and 465 * pull all the log vectors off the items in the CIL, and
446 * remove the items from the CIL. We don't need the CIL lock 466 * remove the items from the CIL. We don't need the CIL lock
@@ -584,6 +604,7 @@ restart:
584 604
585out_skip: 605out_skip:
586 up_write(&cil->xc_ctx_lock); 606 up_write(&cil->xc_ctx_lock);
607out_free_ticket:
587 xfs_log_ticket_put(new_ctx->ticket); 608 xfs_log_ticket_put(new_ctx->ticket);
588 kmem_free(new_ctx); 609 kmem_free(new_ctx);
589 return 0; 610 return 0;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 48d920891b94..8c072618965c 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -425,6 +425,51 @@ struct xfs_cil {
425}; 425};
426 426
427/* 427/*
428 * The amount of log space we should the CIL to aggregate is difficult to size.
429 * Whatever we chose we have to make we can get a reservation for the log space
430 * effectively, that it is large enough to capture sufficient relogging to
431 * reduce log buffer IO significantly, but it is not too large for the log or
432 * induces too much latency when writing out through the iclogs. We track both
433 * space consumed and the number of vectors in the checkpoint context, so we
434 * need to decide which to use for limiting.
435 *
436 * Every log buffer we write out during a push needs a header reserved, which
437 * is at least one sector and more for v2 logs. Hence we need a reservation of
438 * at least 512 bytes per 32k of log space just for the LR headers. That means
439 * 16KB of reservation per megabyte of delayed logging space we will consume,
440 * plus various headers. The number of headers will vary based on the num of
441 * io vectors, so limiting on a specific number of vectors is going to result
442 * in transactions of varying size. IOWs, it is more consistent to track and
443 * limit space consumed in the log rather than by the number of objects being
444 * logged in order to prevent checkpoint ticket overruns.
445 *
446 * Further, use of static reservations through the log grant mechanism is
447 * problematic. It introduces a lot of complexity (e.g. reserve grant vs write
448 * grant) and a significant deadlock potential because regranting write space
449 * can block on log pushes. Hence if we have to regrant log space during a log
450 * push, we can deadlock.
451 *
452 * However, we can avoid this by use of a dynamic "reservation stealing"
453 * technique during transaction commit whereby unused reservation space in the
454 * transaction ticket is transferred to the CIL ctx commit ticket to cover the
455 * space needed by the checkpoint transaction. This means that we never need to
456 * specifically reserve space for the CIL checkpoint transaction, nor do we
457 * need to regrant space once the checkpoint completes. This also means the
458 * checkpoint transaction ticket is specific to the checkpoint context, rather
459 * than the CIL itself.
460 *
461 * With dynamic reservations, we can basically make up arbitrary limits for the
462 * checkpoint size so long as they don't violate any other size rules. Hence
463 * the initial maximum size for the checkpoint transaction will be set to a
464 * quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit
465 * right now based on the latency of writing out a large amount of data through
466 * the circular iclog buffers.
467 */
468
469#define XLOG_CIL_SPACE_LIMIT(log) \
470 (min((log->l_logsize >> 2), (8 * 1024 * 1024)))
471
472/*
428 * The reservation head lsn is not made up of a cycle number and block number. 473 * The reservation head lsn is not made up of a cycle number and block number.
429 * Instead, it uses a cycle number and byte number. Logs don't expect to 474 * Instead, it uses a cycle number and byte number. Logs don't expect to
430 * overflow 31 bits worth of byte offset, so using a byte number will mean 475 * overflow 31 bits worth of byte offset, so using a byte number will mean