diff options
author | Jens Axboe <jaxboe@fusionio.com> | 2010-10-19 03:13:04 -0400 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2010-10-19 03:13:04 -0400 |
commit | fa251f89903d73989e2f63e13d0eaed1e07ce0da (patch) | |
tree | 3f7fe779941e3b6d67754dd7c44a32f48ea47c74 /fs/xfs/xfs_log_cil.c | |
parent | dd3932eddf428571762596e17b65f5dc92ca361b (diff) | |
parent | cd07202cc8262e1669edff0d97715f3dd9260917 (diff) |
Merge branch 'v2.6.36-rc8' into for-2.6.37/barrier
Conflicts:
block/blk-core.c
drivers/block/loop.c
mm/swapfile.c
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'fs/xfs/xfs_log_cil.c')
-rw-r--r-- | fs/xfs/xfs_log_cil.c | 271 |
1 files changed, 164 insertions, 107 deletions
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 31e4ea2d19ac..7e206fc1fa36 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -68,6 +68,7 @@ xlog_cil_init( | |||
68 | ctx->sequence = 1; | 68 | ctx->sequence = 1; |
69 | ctx->cil = cil; | 69 | ctx->cil = cil; |
70 | cil->xc_ctx = ctx; | 70 | cil->xc_ctx = ctx; |
71 | cil->xc_current_sequence = ctx->sequence; | ||
71 | 72 | ||
72 | cil->xc_log = log; | 73 | cil->xc_log = log; |
73 | log->l_cilp = cil; | 74 | log->l_cilp = cil; |
@@ -269,15 +270,10 @@ xlog_cil_insert( | |||
269 | static void | 270 | static void |
270 | xlog_cil_format_items( | 271 | xlog_cil_format_items( |
271 | struct log *log, | 272 | struct log *log, |
272 | struct xfs_log_vec *log_vector, | 273 | struct xfs_log_vec *log_vector) |
273 | struct xlog_ticket *ticket, | ||
274 | xfs_lsn_t *start_lsn) | ||
275 | { | 274 | { |
276 | struct xfs_log_vec *lv; | 275 | struct xfs_log_vec *lv; |
277 | 276 | ||
278 | if (start_lsn) | ||
279 | *start_lsn = log->l_cilp->xc_ctx->sequence; | ||
280 | |||
281 | ASSERT(log_vector); | 277 | ASSERT(log_vector); |
282 | for (lv = log_vector; lv; lv = lv->lv_next) { | 278 | for (lv = log_vector; lv; lv = lv->lv_next) { |
283 | void *ptr; | 279 | void *ptr; |
@@ -301,9 +297,24 @@ xlog_cil_format_items( | |||
301 | ptr += vec->i_len; | 297 | ptr += vec->i_len; |
302 | } | 298 | } |
303 | ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); | 299 | ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); |
300 | } | ||
301 | } | ||
304 | 302 | ||
303 | static void | ||
304 | xlog_cil_insert_items( | ||
305 | struct log *log, | ||
306 | struct xfs_log_vec *log_vector, | ||
307 | struct xlog_ticket *ticket, | ||
308 | xfs_lsn_t *start_lsn) | ||
309 | { | ||
310 | struct xfs_log_vec *lv; | ||
311 | |||
312 | if (start_lsn) | ||
313 | *start_lsn = log->l_cilp->xc_ctx->sequence; | ||
314 | |||
315 | ASSERT(log_vector); | ||
316 | for (lv = log_vector; lv; lv = lv->lv_next) | ||
305 | xlog_cil_insert(log, ticket, lv->lv_item, lv); | 317 | xlog_cil_insert(log, ticket, lv->lv_item, lv); |
306 | } | ||
307 | } | 318 | } |
308 | 319 | ||
309 | static void | 320 | static void |
@@ -321,80 +332,6 @@ xlog_cil_free_logvec( | |||
321 | } | 332 | } |
322 | 333 | ||
323 | /* | 334 | /* |
324 | * Commit a transaction with the given vector to the Committed Item List. | ||
325 | * | ||
326 | * To do this, we need to format the item, pin it in memory if required and | ||
327 | * account for the space used by the transaction. Once we have done that we | ||
328 | * need to release the unused reservation for the transaction, attach the | ||
329 | * transaction to the checkpoint context so we carry the busy extents through | ||
330 | * to checkpoint completion, and then unlock all the items in the transaction. | ||
331 | * | ||
332 | * For more specific information about the order of operations in | ||
333 | * xfs_log_commit_cil() please refer to the comments in | ||
334 | * xfs_trans_commit_iclog(). | ||
335 | * | ||
336 | * Called with the context lock already held in read mode to lock out | ||
337 | * background commit, returns without it held once background commits are | ||
338 | * allowed again. | ||
339 | */ | ||
340 | int | ||
341 | xfs_log_commit_cil( | ||
342 | struct xfs_mount *mp, | ||
343 | struct xfs_trans *tp, | ||
344 | struct xfs_log_vec *log_vector, | ||
345 | xfs_lsn_t *commit_lsn, | ||
346 | int flags) | ||
347 | { | ||
348 | struct log *log = mp->m_log; | ||
349 | int log_flags = 0; | ||
350 | int push = 0; | ||
351 | |||
352 | if (flags & XFS_TRANS_RELEASE_LOG_RES) | ||
353 | log_flags = XFS_LOG_REL_PERM_RESERV; | ||
354 | |||
355 | if (XLOG_FORCED_SHUTDOWN(log)) { | ||
356 | xlog_cil_free_logvec(log_vector); | ||
357 | return XFS_ERROR(EIO); | ||
358 | } | ||
359 | |||
360 | /* lock out background commit */ | ||
361 | down_read(&log->l_cilp->xc_ctx_lock); | ||
362 | xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn); | ||
363 | |||
364 | /* check we didn't blow the reservation */ | ||
365 | if (tp->t_ticket->t_curr_res < 0) | ||
366 | xlog_print_tic_res(log->l_mp, tp->t_ticket); | ||
367 | |||
368 | /* attach the transaction to the CIL if it has any busy extents */ | ||
369 | if (!list_empty(&tp->t_busy)) { | ||
370 | spin_lock(&log->l_cilp->xc_cil_lock); | ||
371 | list_splice_init(&tp->t_busy, | ||
372 | &log->l_cilp->xc_ctx->busy_extents); | ||
373 | spin_unlock(&log->l_cilp->xc_cil_lock); | ||
374 | } | ||
375 | |||
376 | tp->t_commit_lsn = *commit_lsn; | ||
377 | xfs_log_done(mp, tp->t_ticket, NULL, log_flags); | ||
378 | xfs_trans_unreserve_and_mod_sb(tp); | ||
379 | |||
380 | /* check for background commit before unlock */ | ||
381 | if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) | ||
382 | push = 1; | ||
383 | up_read(&log->l_cilp->xc_ctx_lock); | ||
384 | |||
385 | /* | ||
386 | * We need to push CIL every so often so we don't cache more than we | ||
387 | * can fit in the log. The limit really is that a checkpoint can't be | ||
388 | * more than half the log (the current checkpoint is not allowed to | ||
389 | * overwrite the previous checkpoint), but commit latency and memory | ||
390 | * usage limit this to a smaller size in most cases. | ||
391 | */ | ||
392 | if (push) | ||
393 | xlog_cil_push(log, 0); | ||
394 | return 0; | ||
395 | } | ||
396 | |||
397 | /* | ||
398 | * Mark all items committed and clear busy extents. We free the log vector | 335 | * Mark all items committed and clear busy extents. We free the log vector |
399 | * chains in a separate pass so that we unpin the log items as quickly as | 336 | * chains in a separate pass so that we unpin the log items as quickly as |
400 | * possible. | 337 | * possible. |
@@ -427,13 +364,23 @@ xlog_cil_committed( | |||
427 | } | 364 | } |
428 | 365 | ||
429 | /* | 366 | /* |
430 | * Push the Committed Item List to the log. If the push_now flag is not set, | 367 | * Push the Committed Item List to the log. If @push_seq flag is zero, then it |
431 | * then it is a background flush and so we can chose to ignore it. | 368 | * is a background flush and so we can chose to ignore it. Otherwise, if the |
369 | * current sequence is the same as @push_seq we need to do a flush. If | ||
370 | * @push_seq is less than the current sequence, then it has already been | ||
371 | * flushed and we don't need to do anything - the caller will wait for it to | ||
372 | * complete if necessary. | ||
373 | * | ||
374 | * @push_seq is a value rather than a flag because that allows us to do an | ||
375 | * unlocked check of the sequence number for a match. Hence we can allows log | ||
376 | * forces to run racily and not issue pushes for the same sequence twice. If we | ||
377 | * get a race between multiple pushes for the same sequence they will block on | ||
378 | * the first one and then abort, hence avoiding needless pushes. | ||
432 | */ | 379 | */ |
433 | int | 380 | STATIC int |
434 | xlog_cil_push( | 381 | xlog_cil_push( |
435 | struct log *log, | 382 | struct log *log, |
436 | int push_now) | 383 | xfs_lsn_t push_seq) |
437 | { | 384 | { |
438 | struct xfs_cil *cil = log->l_cilp; | 385 | struct xfs_cil *cil = log->l_cilp; |
439 | struct xfs_log_vec *lv; | 386 | struct xfs_log_vec *lv; |
@@ -453,12 +400,20 @@ xlog_cil_push( | |||
453 | if (!cil) | 400 | if (!cil) |
454 | return 0; | 401 | return 0; |
455 | 402 | ||
403 | ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence); | ||
404 | |||
456 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); | 405 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); |
457 | new_ctx->ticket = xlog_cil_ticket_alloc(log); | 406 | new_ctx->ticket = xlog_cil_ticket_alloc(log); |
458 | 407 | ||
459 | /* lock out transaction commit, but don't block on background push */ | 408 | /* |
409 | * Lock out transaction commit, but don't block for background pushes | ||
410 | * unless we are well over the CIL space limit. See the definition of | ||
411 | * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic | ||
412 | * used here. | ||
413 | */ | ||
460 | if (!down_write_trylock(&cil->xc_ctx_lock)) { | 414 | if (!down_write_trylock(&cil->xc_ctx_lock)) { |
461 | if (!push_now) | 415 | if (!push_seq && |
416 | cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log)) | ||
462 | goto out_free_ticket; | 417 | goto out_free_ticket; |
463 | down_write(&cil->xc_ctx_lock); | 418 | down_write(&cil->xc_ctx_lock); |
464 | } | 419 | } |
@@ -469,7 +424,11 @@ xlog_cil_push( | |||
469 | goto out_skip; | 424 | goto out_skip; |
470 | 425 | ||
471 | /* check for spurious background flush */ | 426 | /* check for spurious background flush */ |
472 | if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) | 427 | if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) |
428 | goto out_skip; | ||
429 | |||
430 | /* check for a previously pushed seqeunce */ | ||
431 | if (push_seq && push_seq < cil->xc_ctx->sequence) | ||
473 | goto out_skip; | 432 | goto out_skip; |
474 | 433 | ||
475 | /* | 434 | /* |
@@ -515,6 +474,13 @@ xlog_cil_push( | |||
515 | cil->xc_ctx = new_ctx; | 474 | cil->xc_ctx = new_ctx; |
516 | 475 | ||
517 | /* | 476 | /* |
477 | * mirror the new sequence into the cil structure so that we can do | ||
478 | * unlocked checks against the current sequence in log forces without | ||
479 | * risking deferencing a freed context pointer. | ||
480 | */ | ||
481 | cil->xc_current_sequence = new_ctx->sequence; | ||
482 | |||
483 | /* | ||
518 | * The switch is now done, so we can drop the context lock and move out | 484 | * The switch is now done, so we can drop the context lock and move out |
519 | * of a shared context. We can't just go straight to the commit record, | 485 | * of a shared context. We can't just go straight to the commit record, |
520 | * though - we need to synchronise with previous and future commits so | 486 | * though - we need to synchronise with previous and future commits so |
@@ -626,6 +592,102 @@ out_abort: | |||
626 | } | 592 | } |
627 | 593 | ||
628 | /* | 594 | /* |
595 | * Commit a transaction with the given vector to the Committed Item List. | ||
596 | * | ||
597 | * To do this, we need to format the item, pin it in memory if required and | ||
598 | * account for the space used by the transaction. Once we have done that we | ||
599 | * need to release the unused reservation for the transaction, attach the | ||
600 | * transaction to the checkpoint context so we carry the busy extents through | ||
601 | * to checkpoint completion, and then unlock all the items in the transaction. | ||
602 | * | ||
603 | * For more specific information about the order of operations in | ||
604 | * xfs_log_commit_cil() please refer to the comments in | ||
605 | * xfs_trans_commit_iclog(). | ||
606 | * | ||
607 | * Called with the context lock already held in read mode to lock out | ||
608 | * background commit, returns without it held once background commits are | ||
609 | * allowed again. | ||
610 | */ | ||
611 | int | ||
612 | xfs_log_commit_cil( | ||
613 | struct xfs_mount *mp, | ||
614 | struct xfs_trans *tp, | ||
615 | struct xfs_log_vec *log_vector, | ||
616 | xfs_lsn_t *commit_lsn, | ||
617 | int flags) | ||
618 | { | ||
619 | struct log *log = mp->m_log; | ||
620 | int log_flags = 0; | ||
621 | int push = 0; | ||
622 | |||
623 | if (flags & XFS_TRANS_RELEASE_LOG_RES) | ||
624 | log_flags = XFS_LOG_REL_PERM_RESERV; | ||
625 | |||
626 | if (XLOG_FORCED_SHUTDOWN(log)) { | ||
627 | xlog_cil_free_logvec(log_vector); | ||
628 | return XFS_ERROR(EIO); | ||
629 | } | ||
630 | |||
631 | /* | ||
632 | * do all the hard work of formatting items (including memory | ||
633 | * allocation) outside the CIL context lock. This prevents stalling CIL | ||
634 | * pushes when we are low on memory and a transaction commit spends a | ||
635 | * lot of time in memory reclaim. | ||
636 | */ | ||
637 | xlog_cil_format_items(log, log_vector); | ||
638 | |||
639 | /* lock out background commit */ | ||
640 | down_read(&log->l_cilp->xc_ctx_lock); | ||
641 | xlog_cil_insert_items(log, log_vector, tp->t_ticket, commit_lsn); | ||
642 | |||
643 | /* check we didn't blow the reservation */ | ||
644 | if (tp->t_ticket->t_curr_res < 0) | ||
645 | xlog_print_tic_res(log->l_mp, tp->t_ticket); | ||
646 | |||
647 | /* attach the transaction to the CIL if it has any busy extents */ | ||
648 | if (!list_empty(&tp->t_busy)) { | ||
649 | spin_lock(&log->l_cilp->xc_cil_lock); | ||
650 | list_splice_init(&tp->t_busy, | ||
651 | &log->l_cilp->xc_ctx->busy_extents); | ||
652 | spin_unlock(&log->l_cilp->xc_cil_lock); | ||
653 | } | ||
654 | |||
655 | tp->t_commit_lsn = *commit_lsn; | ||
656 | xfs_log_done(mp, tp->t_ticket, NULL, log_flags); | ||
657 | xfs_trans_unreserve_and_mod_sb(tp); | ||
658 | |||
659 | /* | ||
660 | * Once all the items of the transaction have been copied to the CIL, | ||
661 | * the items can be unlocked and freed. | ||
662 | * | ||
663 | * This needs to be done before we drop the CIL context lock because we | ||
664 | * have to update state in the log items and unlock them before they go | ||
665 | * to disk. If we don't, then the CIL checkpoint can race with us and | ||
666 | * we can run checkpoint completion before we've updated and unlocked | ||
667 | * the log items. This affects (at least) processing of stale buffers, | ||
668 | * inodes and EFIs. | ||
669 | */ | ||
670 | xfs_trans_free_items(tp, *commit_lsn, 0); | ||
671 | |||
672 | /* check for background commit before unlock */ | ||
673 | if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) | ||
674 | push = 1; | ||
675 | |||
676 | up_read(&log->l_cilp->xc_ctx_lock); | ||
677 | |||
678 | /* | ||
679 | * We need to push CIL every so often so we don't cache more than we | ||
680 | * can fit in the log. The limit really is that a checkpoint can't be | ||
681 | * more than half the log (the current checkpoint is not allowed to | ||
682 | * overwrite the previous checkpoint), but commit latency and memory | ||
683 | * usage limit this to a smaller size in most cases. | ||
684 | */ | ||
685 | if (push) | ||
686 | xlog_cil_push(log, 0); | ||
687 | return 0; | ||
688 | } | ||
689 | |||
690 | /* | ||
629 | * Conditionally push the CIL based on the sequence passed in. | 691 | * Conditionally push the CIL based on the sequence passed in. |
630 | * | 692 | * |
631 | * We only need to push if we haven't already pushed the sequence | 693 | * We only need to push if we haven't already pushed the sequence |
@@ -639,39 +701,34 @@ out_abort: | |||
639 | * commit lsn is there. It'll be empty, so this is broken for now. | 701 | * commit lsn is there. It'll be empty, so this is broken for now. |
640 | */ | 702 | */ |
641 | xfs_lsn_t | 703 | xfs_lsn_t |
642 | xlog_cil_push_lsn( | 704 | xlog_cil_force_lsn( |
643 | struct log *log, | 705 | struct log *log, |
644 | xfs_lsn_t push_seq) | 706 | xfs_lsn_t sequence) |
645 | { | 707 | { |
646 | struct xfs_cil *cil = log->l_cilp; | 708 | struct xfs_cil *cil = log->l_cilp; |
647 | struct xfs_cil_ctx *ctx; | 709 | struct xfs_cil_ctx *ctx; |
648 | xfs_lsn_t commit_lsn = NULLCOMMITLSN; | 710 | xfs_lsn_t commit_lsn = NULLCOMMITLSN; |
649 | 711 | ||
650 | restart: | 712 | ASSERT(sequence <= cil->xc_current_sequence); |
651 | down_write(&cil->xc_ctx_lock); | 713 | |
652 | ASSERT(push_seq <= cil->xc_ctx->sequence); | 714 | /* |
653 | 715 | * check to see if we need to force out the current context. | |
654 | /* check to see if we need to force out the current context */ | 716 | * xlog_cil_push() handles racing pushes for the same sequence, |
655 | if (push_seq == cil->xc_ctx->sequence) { | 717 | * so no need to deal with it here. |
656 | up_write(&cil->xc_ctx_lock); | 718 | */ |
657 | xlog_cil_push(log, 1); | 719 | if (sequence == cil->xc_current_sequence) |
658 | goto restart; | 720 | xlog_cil_push(log, sequence); |
659 | } | ||
660 | 721 | ||
661 | /* | 722 | /* |
662 | * See if we can find a previous sequence still committing. | 723 | * See if we can find a previous sequence still committing. |
663 | * We can drop the flush lock as soon as we have the cil lock | ||
664 | * because we are now only comparing contexts protected by | ||
665 | * the cil lock. | ||
666 | * | ||
667 | * We need to wait for all previous sequence commits to complete | 724 | * We need to wait for all previous sequence commits to complete |
668 | * before allowing the force of push_seq to go ahead. Hence block | 725 | * before allowing the force of push_seq to go ahead. Hence block |
669 | * on commits for those as well. | 726 | * on commits for those as well. |
670 | */ | 727 | */ |
728 | restart: | ||
671 | spin_lock(&cil->xc_cil_lock); | 729 | spin_lock(&cil->xc_cil_lock); |
672 | up_write(&cil->xc_ctx_lock); | ||
673 | list_for_each_entry(ctx, &cil->xc_committing, committing) { | 730 | list_for_each_entry(ctx, &cil->xc_committing, committing) { |
674 | if (ctx->sequence > push_seq) | 731 | if (ctx->sequence > sequence) |
675 | continue; | 732 | continue; |
676 | if (!ctx->commit_lsn) { | 733 | if (!ctx->commit_lsn) { |
677 | /* | 734 | /* |
@@ -681,7 +738,7 @@ restart: | |||
681 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); | 738 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); |
682 | goto restart; | 739 | goto restart; |
683 | } | 740 | } |
684 | if (ctx->sequence != push_seq) | 741 | if (ctx->sequence != sequence) |
685 | continue; | 742 | continue; |
686 | /* found it! */ | 743 | /* found it! */ |
687 | commit_lsn = ctx->commit_lsn; | 744 | commit_lsn = ctx->commit_lsn; |