aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/xfs_log_cil.c29
-rw-r--r--fs/xfs/xfs_log_priv.h45
2 files changed, 70 insertions, 4 deletions
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 53abd6b0a33..9b21f80f31c 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -336,6 +336,7 @@ xfs_log_commit_cil(
336{ 336{
337 struct log *log = mp->m_log; 337 struct log *log = mp->m_log;
338 int log_flags = 0; 338 int log_flags = 0;
339 int push = 0;
339 340
340 if (flags & XFS_TRANS_RELEASE_LOG_RES) 341 if (flags & XFS_TRANS_RELEASE_LOG_RES)
341 log_flags = XFS_LOG_REL_PERM_RESERV; 342 log_flags = XFS_LOG_REL_PERM_RESERV;
@@ -365,8 +366,20 @@ xfs_log_commit_cil(
365 xfs_log_done(mp, tp->t_ticket, NULL, log_flags); 366 xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
366 xfs_trans_unreserve_and_mod_sb(tp); 367 xfs_trans_unreserve_and_mod_sb(tp);
367 368
368 /* background commit is allowed again */ 369 /* check for background commit before unlock */
370 if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
371 push = 1;
369 up_read(&log->l_cilp->xc_ctx_lock); 372 up_read(&log->l_cilp->xc_ctx_lock);
373
374 /*
375 * We need to push CIL every so often so we don't cache more than we
376 * can fit in the log. The limit really is that a checkpoint can't be
377 * more than half the log (the current checkpoint is not allowed to
378 * overwrite the previous checkpoint), but commit latency and memory
379 * usage limit this to a smaller size in most cases.
380 */
381 if (push)
382 xlog_cil_push(log, 0);
370 return 0; 383 return 0;
371} 384}
372 385
@@ -429,18 +442,25 @@ xlog_cil_push(
429 if (!cil) 442 if (!cil)
430 return 0; 443 return 0;
431 444
432 /* XXX: don't sleep for background? */
433 new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); 445 new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
434 new_ctx->ticket = xlog_cil_ticket_alloc(log); 446 new_ctx->ticket = xlog_cil_ticket_alloc(log);
435 447
436 /* lock out transaction commit */ 448 /* lock out transaction commit, but don't block on background push */
437 down_write(&cil->xc_ctx_lock); 449 if (!down_write_trylock(&cil->xc_ctx_lock)) {
450 if (!push_now)
451 goto out_free_ticket;
452 down_write(&cil->xc_ctx_lock);
453 }
438 ctx = cil->xc_ctx; 454 ctx = cil->xc_ctx;
439 455
440 /* check if we've anything to push */ 456 /* check if we've anything to push */
441 if (list_empty(&cil->xc_cil)) 457 if (list_empty(&cil->xc_cil))
442 goto out_skip; 458 goto out_skip;
443 459
460 /* check for spurious background flush */
461 if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
462 goto out_skip;
463
444 /* 464 /*
445 * pull all the log vectors off the items in the CIL, and 465 * pull all the log vectors off the items in the CIL, and
446 * remove the items from the CIL. We don't need the CIL lock 466 * remove the items from the CIL. We don't need the CIL lock
@@ -584,6 +604,7 @@ restart:
584 604
585out_skip: 605out_skip:
586 up_write(&cil->xc_ctx_lock); 606 up_write(&cil->xc_ctx_lock);
607out_free_ticket:
587 xfs_log_ticket_put(new_ctx->ticket); 608 xfs_log_ticket_put(new_ctx->ticket);
588 kmem_free(new_ctx); 609 kmem_free(new_ctx);
589 return 0; 610 return 0;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 48d920891b9..8c072618965 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -425,6 +425,51 @@ struct xfs_cil {
425}; 425};
426 426
427/* 427/*
428 * The amount of log space we should the CIL to aggregate is difficult to size.
429 * Whatever we chose we have to make we can get a reservation for the log space
430 * effectively, that it is large enough to capture sufficient relogging to
431 * reduce log buffer IO significantly, but it is not too large for the log or
432 * induces too much latency when writing out through the iclogs. We track both
433 * space consumed and the number of vectors in the checkpoint context, so we
434 * need to decide which to use for limiting.
435 *
436 * Every log buffer we write out during a push needs a header reserved, which
437 * is at least one sector and more for v2 logs. Hence we need a reservation of
438 * at least 512 bytes per 32k of log space just for the LR headers. That means
439 * 16KB of reservation per megabyte of delayed logging space we will consume,
440 * plus various headers. The number of headers will vary based on the num of
441 * io vectors, so limiting on a specific number of vectors is going to result
442 * in transactions of varying size. IOWs, it is more consistent to track and
443 * limit space consumed in the log rather than by the number of objects being
444 * logged in order to prevent checkpoint ticket overruns.
445 *
446 * Further, use of static reservations through the log grant mechanism is
447 * problematic. It introduces a lot of complexity (e.g. reserve grant vs write
448 * grant) and a significant deadlock potential because regranting write space
449 * can block on log pushes. Hence if we have to regrant log space during a log
450 * push, we can deadlock.
451 *
452 * However, we can avoid this by use of a dynamic "reservation stealing"
453 * technique during transaction commit whereby unused reservation space in the
454 * transaction ticket is transferred to the CIL ctx commit ticket to cover the
455 * space needed by the checkpoint transaction. This means that we never need to
456 * specifically reserve space for the CIL checkpoint transaction, nor do we
457 * need to regrant space once the checkpoint completes. This also means the
458 * checkpoint transaction ticket is specific to the checkpoint context, rather
459 * than the CIL itself.
460 *
461 * With dynamic reservations, we can basically make up arbitrary limits for the
462 * checkpoint size so long as they don't violate any other size rules. Hence
463 * the initial maximum size for the checkpoint transaction will be set to a
464 * quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit
465 * right now based on the latency of writing out a large amount of data through
466 * the circular iclog buffers.
467 */
468
469#define XLOG_CIL_SPACE_LIMIT(log) \
470 (min((log->l_logsize >> 2), (8 * 1024 * 1024)))
471
472/*
428 * The reservation head lsn is not made up of a cycle number and block number. 473 * The reservation head lsn is not made up of a cycle number and block number.
429 * Instead, it uses a cycle number and byte number. Logs don't expect to 474 * Instead, it uses a cycle number and byte number. Logs don't expect to
430 * overflow 31 bits worth of byte offset, so using a byte number will mean 475 * overflow 31 bits worth of byte offset, so using a byte number will mean