diff options
-rw-r--r-- | fs/xfs/xfs_log_cil.c | 29 | ||||
-rw-r--r-- | fs/xfs/xfs_log_priv.h | 45 |
2 files changed, 70 insertions, 4 deletions
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 53abd6b0a33..9b21f80f31c 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -336,6 +336,7 @@ xfs_log_commit_cil( | |||
336 | { | 336 | { |
337 | struct log *log = mp->m_log; | 337 | struct log *log = mp->m_log; |
338 | int log_flags = 0; | 338 | int log_flags = 0; |
339 | int push = 0; | ||
339 | 340 | ||
340 | if (flags & XFS_TRANS_RELEASE_LOG_RES) | 341 | if (flags & XFS_TRANS_RELEASE_LOG_RES) |
341 | log_flags = XFS_LOG_REL_PERM_RESERV; | 342 | log_flags = XFS_LOG_REL_PERM_RESERV; |
@@ -365,8 +366,20 @@ xfs_log_commit_cil( | |||
365 | xfs_log_done(mp, tp->t_ticket, NULL, log_flags); | 366 | xfs_log_done(mp, tp->t_ticket, NULL, log_flags); |
366 | xfs_trans_unreserve_and_mod_sb(tp); | 367 | xfs_trans_unreserve_and_mod_sb(tp); |
367 | 368 | ||
368 | /* background commit is allowed again */ | 369 | /* check for background commit before unlock */ |
370 | if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) | ||
371 | push = 1; | ||
369 | up_read(&log->l_cilp->xc_ctx_lock); | 372 | up_read(&log->l_cilp->xc_ctx_lock); |
373 | |||
374 | /* | ||
375 | * We need to push CIL every so often so we don't cache more than we | ||
376 | * can fit in the log. The limit really is that a checkpoint can't be | ||
377 | * more than half the log (the current checkpoint is not allowed to | ||
378 | * overwrite the previous checkpoint), but commit latency and memory | ||
379 | * usage limit this to a smaller size in most cases. | ||
380 | */ | ||
381 | if (push) | ||
382 | xlog_cil_push(log, 0); | ||
370 | return 0; | 383 | return 0; |
371 | } | 384 | } |
372 | 385 | ||
@@ -429,18 +442,25 @@ xlog_cil_push( | |||
429 | if (!cil) | 442 | if (!cil) |
430 | return 0; | 443 | return 0; |
431 | 444 | ||
432 | /* XXX: don't sleep for background? */ | ||
433 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); | 445 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); |
434 | new_ctx->ticket = xlog_cil_ticket_alloc(log); | 446 | new_ctx->ticket = xlog_cil_ticket_alloc(log); |
435 | 447 | ||
436 | /* lock out transaction commit */ | 448 | /* lock out transaction commit, but don't block on background push */ |
437 | down_write(&cil->xc_ctx_lock); | 449 | if (!down_write_trylock(&cil->xc_ctx_lock)) { |
450 | if (!push_now) | ||
451 | goto out_free_ticket; | ||
452 | down_write(&cil->xc_ctx_lock); | ||
453 | } | ||
438 | ctx = cil->xc_ctx; | 454 | ctx = cil->xc_ctx; |
439 | 455 | ||
440 | /* check if we've anything to push */ | 456 | /* check if we've anything to push */ |
441 | if (list_empty(&cil->xc_cil)) | 457 | if (list_empty(&cil->xc_cil)) |
442 | goto out_skip; | 458 | goto out_skip; |
443 | 459 | ||
460 | /* check for spurious background flush */ | ||
461 | if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) | ||
462 | goto out_skip; | ||
463 | |||
444 | /* | 464 | /* |
445 | * pull all the log vectors off the items in the CIL, and | 465 | * pull all the log vectors off the items in the CIL, and |
446 | * remove the items from the CIL. We don't need the CIL lock | 466 | * remove the items from the CIL. We don't need the CIL lock |
@@ -584,6 +604,7 @@ restart: | |||
584 | 604 | ||
585 | out_skip: | 605 | out_skip: |
586 | up_write(&cil->xc_ctx_lock); | 606 | up_write(&cil->xc_ctx_lock); |
607 | out_free_ticket: | ||
587 | xfs_log_ticket_put(new_ctx->ticket); | 608 | xfs_log_ticket_put(new_ctx->ticket); |
588 | kmem_free(new_ctx); | 609 | kmem_free(new_ctx); |
589 | return 0; | 610 | return 0; |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 48d920891b9..8c072618965 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -425,6 +425,51 @@ struct xfs_cil { | |||
425 | }; | 425 | }; |
426 | 426 | ||
427 | /* | 427 | /* |
428 | * The amount of log space we should the CIL to aggregate is difficult to size. | ||
429 | * Whatever we chose we have to make we can get a reservation for the log space | ||
430 | * effectively, that it is large enough to capture sufficient relogging to | ||
431 | * reduce log buffer IO significantly, but it is not too large for the log or | ||
432 | * induces too much latency when writing out through the iclogs. We track both | ||
433 | * space consumed and the number of vectors in the checkpoint context, so we | ||
434 | * need to decide which to use for limiting. | ||
435 | * | ||
436 | * Every log buffer we write out during a push needs a header reserved, which | ||
437 | * is at least one sector and more for v2 logs. Hence we need a reservation of | ||
438 | * at least 512 bytes per 32k of log space just for the LR headers. That means | ||
439 | * 16KB of reservation per megabyte of delayed logging space we will consume, | ||
440 | * plus various headers. The number of headers will vary based on the num of | ||
441 | * io vectors, so limiting on a specific number of vectors is going to result | ||
442 | * in transactions of varying size. IOWs, it is more consistent to track and | ||
443 | * limit space consumed in the log rather than by the number of objects being | ||
444 | * logged in order to prevent checkpoint ticket overruns. | ||
445 | * | ||
446 | * Further, use of static reservations through the log grant mechanism is | ||
447 | * problematic. It introduces a lot of complexity (e.g. reserve grant vs write | ||
448 | * grant) and a significant deadlock potential because regranting write space | ||
449 | * can block on log pushes. Hence if we have to regrant log space during a log | ||
450 | * push, we can deadlock. | ||
451 | * | ||
452 | * However, we can avoid this by use of a dynamic "reservation stealing" | ||
453 | * technique during transaction commit whereby unused reservation space in the | ||
454 | * transaction ticket is transferred to the CIL ctx commit ticket to cover the | ||
455 | * space needed by the checkpoint transaction. This means that we never need to | ||
456 | * specifically reserve space for the CIL checkpoint transaction, nor do we | ||
457 | * need to regrant space once the checkpoint completes. This also means the | ||
458 | * checkpoint transaction ticket is specific to the checkpoint context, rather | ||
459 | * than the CIL itself. | ||
460 | * | ||
461 | * With dynamic reservations, we can basically make up arbitrary limits for the | ||
462 | * checkpoint size so long as they don't violate any other size rules. Hence | ||
463 | * the initial maximum size for the checkpoint transaction will be set to a | ||
464 | * quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit | ||
465 | * right now based on the latency of writing out a large amount of data through | ||
466 | * the circular iclog buffers. | ||
467 | */ | ||
468 | |||
469 | #define XLOG_CIL_SPACE_LIMIT(log) \ | ||
470 | (min((log->l_logsize >> 2), (8 * 1024 * 1024))) | ||
471 | |||
472 | /* | ||
428 | * The reservation head lsn is not made up of a cycle number and block number. | 473 | * The reservation head lsn is not made up of a cycle number and block number. |
429 | * Instead, it uses a cycle number and byte number. Logs don't expect to | 474 | * Instead, it uses a cycle number and byte number. Logs don't expect to |
430 | * overflow 31 bits worth of byte offset, so using a byte number will mean | 475 | * overflow 31 bits worth of byte offset, so using a byte number will mean |