diff options
Diffstat (limited to 'fs/xfs/xfs_log_cil.c')
| -rw-r--r-- | fs/xfs/xfs_log_cil.c | 271 |
1 files changed, 164 insertions, 107 deletions
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 31e4ea2d19ac..7e206fc1fa36 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
| @@ -68,6 +68,7 @@ xlog_cil_init( | |||
| 68 | ctx->sequence = 1; | 68 | ctx->sequence = 1; |
| 69 | ctx->cil = cil; | 69 | ctx->cil = cil; |
| 70 | cil->xc_ctx = ctx; | 70 | cil->xc_ctx = ctx; |
| 71 | cil->xc_current_sequence = ctx->sequence; | ||
| 71 | 72 | ||
| 72 | cil->xc_log = log; | 73 | cil->xc_log = log; |
| 73 | log->l_cilp = cil; | 74 | log->l_cilp = cil; |
| @@ -269,15 +270,10 @@ xlog_cil_insert( | |||
| 269 | static void | 270 | static void |
| 270 | xlog_cil_format_items( | 271 | xlog_cil_format_items( |
| 271 | struct log *log, | 272 | struct log *log, |
| 272 | struct xfs_log_vec *log_vector, | 273 | struct xfs_log_vec *log_vector) |
| 273 | struct xlog_ticket *ticket, | ||
| 274 | xfs_lsn_t *start_lsn) | ||
| 275 | { | 274 | { |
| 276 | struct xfs_log_vec *lv; | 275 | struct xfs_log_vec *lv; |
| 277 | 276 | ||
| 278 | if (start_lsn) | ||
| 279 | *start_lsn = log->l_cilp->xc_ctx->sequence; | ||
| 280 | |||
| 281 | ASSERT(log_vector); | 277 | ASSERT(log_vector); |
| 282 | for (lv = log_vector; lv; lv = lv->lv_next) { | 278 | for (lv = log_vector; lv; lv = lv->lv_next) { |
| 283 | void *ptr; | 279 | void *ptr; |
| @@ -301,9 +297,24 @@ xlog_cil_format_items( | |||
| 301 | ptr += vec->i_len; | 297 | ptr += vec->i_len; |
| 302 | } | 298 | } |
| 303 | ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); | 299 | ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); |
| 300 | } | ||
| 301 | } | ||
| 304 | 302 | ||
| 303 | static void | ||
| 304 | xlog_cil_insert_items( | ||
| 305 | struct log *log, | ||
| 306 | struct xfs_log_vec *log_vector, | ||
| 307 | struct xlog_ticket *ticket, | ||
| 308 | xfs_lsn_t *start_lsn) | ||
| 309 | { | ||
| 310 | struct xfs_log_vec *lv; | ||
| 311 | |||
| 312 | if (start_lsn) | ||
| 313 | *start_lsn = log->l_cilp->xc_ctx->sequence; | ||
| 314 | |||
| 315 | ASSERT(log_vector); | ||
| 316 | for (lv = log_vector; lv; lv = lv->lv_next) | ||
| 305 | xlog_cil_insert(log, ticket, lv->lv_item, lv); | 317 | xlog_cil_insert(log, ticket, lv->lv_item, lv); |
| 306 | } | ||
| 307 | } | 318 | } |
| 308 | 319 | ||
| 309 | static void | 320 | static void |
| @@ -321,80 +332,6 @@ xlog_cil_free_logvec( | |||
| 321 | } | 332 | } |
| 322 | 333 | ||
| 323 | /* | 334 | /* |
| 324 | * Commit a transaction with the given vector to the Committed Item List. | ||
| 325 | * | ||
| 326 | * To do this, we need to format the item, pin it in memory if required and | ||
| 327 | * account for the space used by the transaction. Once we have done that we | ||
| 328 | * need to release the unused reservation for the transaction, attach the | ||
| 329 | * transaction to the checkpoint context so we carry the busy extents through | ||
| 330 | * to checkpoint completion, and then unlock all the items in the transaction. | ||
| 331 | * | ||
| 332 | * For more specific information about the order of operations in | ||
| 333 | * xfs_log_commit_cil() please refer to the comments in | ||
| 334 | * xfs_trans_commit_iclog(). | ||
| 335 | * | ||
| 336 | * Called with the context lock already held in read mode to lock out | ||
| 337 | * background commit, returns without it held once background commits are | ||
| 338 | * allowed again. | ||
| 339 | */ | ||
| 340 | int | ||
| 341 | xfs_log_commit_cil( | ||
| 342 | struct xfs_mount *mp, | ||
| 343 | struct xfs_trans *tp, | ||
| 344 | struct xfs_log_vec *log_vector, | ||
| 345 | xfs_lsn_t *commit_lsn, | ||
| 346 | int flags) | ||
| 347 | { | ||
| 348 | struct log *log = mp->m_log; | ||
| 349 | int log_flags = 0; | ||
| 350 | int push = 0; | ||
| 351 | |||
| 352 | if (flags & XFS_TRANS_RELEASE_LOG_RES) | ||
| 353 | log_flags = XFS_LOG_REL_PERM_RESERV; | ||
| 354 | |||
| 355 | if (XLOG_FORCED_SHUTDOWN(log)) { | ||
| 356 | xlog_cil_free_logvec(log_vector); | ||
| 357 | return XFS_ERROR(EIO); | ||
| 358 | } | ||
| 359 | |||
| 360 | /* lock out background commit */ | ||
| 361 | down_read(&log->l_cilp->xc_ctx_lock); | ||
| 362 | xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn); | ||
| 363 | |||
| 364 | /* check we didn't blow the reservation */ | ||
| 365 | if (tp->t_ticket->t_curr_res < 0) | ||
| 366 | xlog_print_tic_res(log->l_mp, tp->t_ticket); | ||
| 367 | |||
| 368 | /* attach the transaction to the CIL if it has any busy extents */ | ||
| 369 | if (!list_empty(&tp->t_busy)) { | ||
| 370 | spin_lock(&log->l_cilp->xc_cil_lock); | ||
| 371 | list_splice_init(&tp->t_busy, | ||
| 372 | &log->l_cilp->xc_ctx->busy_extents); | ||
| 373 | spin_unlock(&log->l_cilp->xc_cil_lock); | ||
| 374 | } | ||
| 375 | |||
| 376 | tp->t_commit_lsn = *commit_lsn; | ||
| 377 | xfs_log_done(mp, tp->t_ticket, NULL, log_flags); | ||
| 378 | xfs_trans_unreserve_and_mod_sb(tp); | ||
| 379 | |||
| 380 | /* check for background commit before unlock */ | ||
| 381 | if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) | ||
| 382 | push = 1; | ||
| 383 | up_read(&log->l_cilp->xc_ctx_lock); | ||
| 384 | |||
| 385 | /* | ||
| 386 | * We need to push CIL every so often so we don't cache more than we | ||
| 387 | * can fit in the log. The limit really is that a checkpoint can't be | ||
| 388 | * more than half the log (the current checkpoint is not allowed to | ||
| 389 | * overwrite the previous checkpoint), but commit latency and memory | ||
| 390 | * usage limit this to a smaller size in most cases. | ||
| 391 | */ | ||
| 392 | if (push) | ||
| 393 | xlog_cil_push(log, 0); | ||
| 394 | return 0; | ||
| 395 | } | ||
| 396 | |||
| 397 | /* | ||
| 398 | * Mark all items committed and clear busy extents. We free the log vector | 335 | * Mark all items committed and clear busy extents. We free the log vector |
| 399 | * chains in a separate pass so that we unpin the log items as quickly as | 336 | * chains in a separate pass so that we unpin the log items as quickly as |
| 400 | * possible. | 337 | * possible. |
| @@ -427,13 +364,23 @@ xlog_cil_committed( | |||
| 427 | } | 364 | } |
| 428 | 365 | ||
| 429 | /* | 366 | /* |
| 430 | * Push the Committed Item List to the log. If the push_now flag is not set, | 367 | * Push the Committed Item List to the log. If @push_seq flag is zero, then it |
| 431 | * then it is a background flush and so we can chose to ignore it. | 368 | * is a background flush and so we can chose to ignore it. Otherwise, if the |
| 369 | * current sequence is the same as @push_seq we need to do a flush. If | ||
| 370 | * @push_seq is less than the current sequence, then it has already been | ||
| 371 | * flushed and we don't need to do anything - the caller will wait for it to | ||
| 372 | * complete if necessary. | ||
| 373 | * | ||
| 374 | * @push_seq is a value rather than a flag because that allows us to do an | ||
| 375 | * unlocked check of the sequence number for a match. Hence we can allows log | ||
| 376 | * forces to run racily and not issue pushes for the same sequence twice. If we | ||
| 377 | * get a race between multiple pushes for the same sequence they will block on | ||
| 378 | * the first one and then abort, hence avoiding needless pushes. | ||
| 432 | */ | 379 | */ |
| 433 | int | 380 | STATIC int |
| 434 | xlog_cil_push( | 381 | xlog_cil_push( |
| 435 | struct log *log, | 382 | struct log *log, |
| 436 | int push_now) | 383 | xfs_lsn_t push_seq) |
| 437 | { | 384 | { |
| 438 | struct xfs_cil *cil = log->l_cilp; | 385 | struct xfs_cil *cil = log->l_cilp; |
| 439 | struct xfs_log_vec *lv; | 386 | struct xfs_log_vec *lv; |
| @@ -453,12 +400,20 @@ xlog_cil_push( | |||
| 453 | if (!cil) | 400 | if (!cil) |
| 454 | return 0; | 401 | return 0; |
| 455 | 402 | ||
| 403 | ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence); | ||
| 404 | |||
| 456 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); | 405 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); |
| 457 | new_ctx->ticket = xlog_cil_ticket_alloc(log); | 406 | new_ctx->ticket = xlog_cil_ticket_alloc(log); |
| 458 | 407 | ||
| 459 | /* lock out transaction commit, but don't block on background push */ | 408 | /* |
| 409 | * Lock out transaction commit, but don't block for background pushes | ||
| 410 | * unless we are well over the CIL space limit. See the definition of | ||
| 411 | * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic | ||
| 412 | * used here. | ||
| 413 | */ | ||
| 460 | if (!down_write_trylock(&cil->xc_ctx_lock)) { | 414 | if (!down_write_trylock(&cil->xc_ctx_lock)) { |
| 461 | if (!push_now) | 415 | if (!push_seq && |
| 416 | cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log)) | ||
| 462 | goto out_free_ticket; | 417 | goto out_free_ticket; |
| 463 | down_write(&cil->xc_ctx_lock); | 418 | down_write(&cil->xc_ctx_lock); |
| 464 | } | 419 | } |
| @@ -469,7 +424,11 @@ xlog_cil_push( | |||
| 469 | goto out_skip; | 424 | goto out_skip; |
| 470 | 425 | ||
| 471 | /* check for spurious background flush */ | 426 | /* check for spurious background flush */ |
| 472 | if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) | 427 | if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) |
| 428 | goto out_skip; | ||
| 429 | |||
| 430 | /* check for a previously pushed seqeunce */ | ||
| 431 | if (push_seq && push_seq < cil->xc_ctx->sequence) | ||
| 473 | goto out_skip; | 432 | goto out_skip; |
| 474 | 433 | ||
| 475 | /* | 434 | /* |
| @@ -515,6 +474,13 @@ xlog_cil_push( | |||
| 515 | cil->xc_ctx = new_ctx; | 474 | cil->xc_ctx = new_ctx; |
| 516 | 475 | ||
| 517 | /* | 476 | /* |
| 477 | * mirror the new sequence into the cil structure so that we can do | ||
| 478 | * unlocked checks against the current sequence in log forces without | ||
| 479 | * risking deferencing a freed context pointer. | ||
| 480 | */ | ||
| 481 | cil->xc_current_sequence = new_ctx->sequence; | ||
| 482 | |||
| 483 | /* | ||
| 518 | * The switch is now done, so we can drop the context lock and move out | 484 | * The switch is now done, so we can drop the context lock and move out |
| 519 | * of a shared context. We can't just go straight to the commit record, | 485 | * of a shared context. We can't just go straight to the commit record, |
| 520 | * though - we need to synchronise with previous and future commits so | 486 | * though - we need to synchronise with previous and future commits so |
| @@ -626,6 +592,102 @@ out_abort: | |||
| 626 | } | 592 | } |
| 627 | 593 | ||
| 628 | /* | 594 | /* |
| 595 | * Commit a transaction with the given vector to the Committed Item List. | ||
| 596 | * | ||
| 597 | * To do this, we need to format the item, pin it in memory if required and | ||
| 598 | * account for the space used by the transaction. Once we have done that we | ||
| 599 | * need to release the unused reservation for the transaction, attach the | ||
| 600 | * transaction to the checkpoint context so we carry the busy extents through | ||
| 601 | * to checkpoint completion, and then unlock all the items in the transaction. | ||
| 602 | * | ||
| 603 | * For more specific information about the order of operations in | ||
| 604 | * xfs_log_commit_cil() please refer to the comments in | ||
| 605 | * xfs_trans_commit_iclog(). | ||
| 606 | * | ||
| 607 | * Called with the context lock already held in read mode to lock out | ||
| 608 | * background commit, returns without it held once background commits are | ||
| 609 | * allowed again. | ||
| 610 | */ | ||
| 611 | int | ||
| 612 | xfs_log_commit_cil( | ||
| 613 | struct xfs_mount *mp, | ||
| 614 | struct xfs_trans *tp, | ||
| 615 | struct xfs_log_vec *log_vector, | ||
| 616 | xfs_lsn_t *commit_lsn, | ||
| 617 | int flags) | ||
| 618 | { | ||
| 619 | struct log *log = mp->m_log; | ||
| 620 | int log_flags = 0; | ||
| 621 | int push = 0; | ||
| 622 | |||
| 623 | if (flags & XFS_TRANS_RELEASE_LOG_RES) | ||
| 624 | log_flags = XFS_LOG_REL_PERM_RESERV; | ||
| 625 | |||
| 626 | if (XLOG_FORCED_SHUTDOWN(log)) { | ||
| 627 | xlog_cil_free_logvec(log_vector); | ||
| 628 | return XFS_ERROR(EIO); | ||
| 629 | } | ||
| 630 | |||
| 631 | /* | ||
| 632 | * do all the hard work of formatting items (including memory | ||
| 633 | * allocation) outside the CIL context lock. This prevents stalling CIL | ||
| 634 | * pushes when we are low on memory and a transaction commit spends a | ||
| 635 | * lot of time in memory reclaim. | ||
| 636 | */ | ||
| 637 | xlog_cil_format_items(log, log_vector); | ||
| 638 | |||
| 639 | /* lock out background commit */ | ||
| 640 | down_read(&log->l_cilp->xc_ctx_lock); | ||
| 641 | xlog_cil_insert_items(log, log_vector, tp->t_ticket, commit_lsn); | ||
| 642 | |||
| 643 | /* check we didn't blow the reservation */ | ||
| 644 | if (tp->t_ticket->t_curr_res < 0) | ||
| 645 | xlog_print_tic_res(log->l_mp, tp->t_ticket); | ||
| 646 | |||
| 647 | /* attach the transaction to the CIL if it has any busy extents */ | ||
| 648 | if (!list_empty(&tp->t_busy)) { | ||
| 649 | spin_lock(&log->l_cilp->xc_cil_lock); | ||
| 650 | list_splice_init(&tp->t_busy, | ||
| 651 | &log->l_cilp->xc_ctx->busy_extents); | ||
| 652 | spin_unlock(&log->l_cilp->xc_cil_lock); | ||
| 653 | } | ||
| 654 | |||
| 655 | tp->t_commit_lsn = *commit_lsn; | ||
| 656 | xfs_log_done(mp, tp->t_ticket, NULL, log_flags); | ||
| 657 | xfs_trans_unreserve_and_mod_sb(tp); | ||
| 658 | |||
| 659 | /* | ||
| 660 | * Once all the items of the transaction have been copied to the CIL, | ||
| 661 | * the items can be unlocked and freed. | ||
| 662 | * | ||
| 663 | * This needs to be done before we drop the CIL context lock because we | ||
| 664 | * have to update state in the log items and unlock them before they go | ||
| 665 | * to disk. If we don't, then the CIL checkpoint can race with us and | ||
| 666 | * we can run checkpoint completion before we've updated and unlocked | ||
| 667 | * the log items. This affects (at least) processing of stale buffers, | ||
| 668 | * inodes and EFIs. | ||
| 669 | */ | ||
| 670 | xfs_trans_free_items(tp, *commit_lsn, 0); | ||
| 671 | |||
| 672 | /* check for background commit before unlock */ | ||
| 673 | if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) | ||
| 674 | push = 1; | ||
| 675 | |||
| 676 | up_read(&log->l_cilp->xc_ctx_lock); | ||
| 677 | |||
| 678 | /* | ||
| 679 | * We need to push CIL every so often so we don't cache more than we | ||
| 680 | * can fit in the log. The limit really is that a checkpoint can't be | ||
| 681 | * more than half the log (the current checkpoint is not allowed to | ||
| 682 | * overwrite the previous checkpoint), but commit latency and memory | ||
| 683 | * usage limit this to a smaller size in most cases. | ||
| 684 | */ | ||
| 685 | if (push) | ||
| 686 | xlog_cil_push(log, 0); | ||
| 687 | return 0; | ||
| 688 | } | ||
| 689 | |||
| 690 | /* | ||
| 629 | * Conditionally push the CIL based on the sequence passed in. | 691 | * Conditionally push the CIL based on the sequence passed in. |
| 630 | * | 692 | * |
| 631 | * We only need to push if we haven't already pushed the sequence | 693 | * We only need to push if we haven't already pushed the sequence |
| @@ -639,39 +701,34 @@ out_abort: | |||
| 639 | * commit lsn is there. It'll be empty, so this is broken for now. | 701 | * commit lsn is there. It'll be empty, so this is broken for now. |
| 640 | */ | 702 | */ |
| 641 | xfs_lsn_t | 703 | xfs_lsn_t |
| 642 | xlog_cil_push_lsn( | 704 | xlog_cil_force_lsn( |
| 643 | struct log *log, | 705 | struct log *log, |
| 644 | xfs_lsn_t push_seq) | 706 | xfs_lsn_t sequence) |
| 645 | { | 707 | { |
| 646 | struct xfs_cil *cil = log->l_cilp; | 708 | struct xfs_cil *cil = log->l_cilp; |
| 647 | struct xfs_cil_ctx *ctx; | 709 | struct xfs_cil_ctx *ctx; |
| 648 | xfs_lsn_t commit_lsn = NULLCOMMITLSN; | 710 | xfs_lsn_t commit_lsn = NULLCOMMITLSN; |
| 649 | 711 | ||
| 650 | restart: | 712 | ASSERT(sequence <= cil->xc_current_sequence); |
| 651 | down_write(&cil->xc_ctx_lock); | 713 | |
| 652 | ASSERT(push_seq <= cil->xc_ctx->sequence); | 714 | /* |
| 653 | 715 | * check to see if we need to force out the current context. | |
| 654 | /* check to see if we need to force out the current context */ | 716 | * xlog_cil_push() handles racing pushes for the same sequence, |
| 655 | if (push_seq == cil->xc_ctx->sequence) { | 717 | * so no need to deal with it here. |
| 656 | up_write(&cil->xc_ctx_lock); | 718 | */ |
| 657 | xlog_cil_push(log, 1); | 719 | if (sequence == cil->xc_current_sequence) |
| 658 | goto restart; | 720 | xlog_cil_push(log, sequence); |
| 659 | } | ||
| 660 | 721 | ||
| 661 | /* | 722 | /* |
| 662 | * See if we can find a previous sequence still committing. | 723 | * See if we can find a previous sequence still committing. |
| 663 | * We can drop the flush lock as soon as we have the cil lock | ||
| 664 | * because we are now only comparing contexts protected by | ||
| 665 | * the cil lock. | ||
| 666 | * | ||
| 667 | * We need to wait for all previous sequence commits to complete | 724 | * We need to wait for all previous sequence commits to complete |
| 668 | * before allowing the force of push_seq to go ahead. Hence block | 725 | * before allowing the force of push_seq to go ahead. Hence block |
| 669 | * on commits for those as well. | 726 | * on commits for those as well. |
| 670 | */ | 727 | */ |
| 728 | restart: | ||
| 671 | spin_lock(&cil->xc_cil_lock); | 729 | spin_lock(&cil->xc_cil_lock); |
| 672 | up_write(&cil->xc_ctx_lock); | ||
| 673 | list_for_each_entry(ctx, &cil->xc_committing, committing) { | 730 | list_for_each_entry(ctx, &cil->xc_committing, committing) { |
| 674 | if (ctx->sequence > push_seq) | 731 | if (ctx->sequence > sequence) |
| 675 | continue; | 732 | continue; |
| 676 | if (!ctx->commit_lsn) { | 733 | if (!ctx->commit_lsn) { |
| 677 | /* | 734 | /* |
| @@ -681,7 +738,7 @@ restart: | |||
| 681 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); | 738 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); |
| 682 | goto restart; | 739 | goto restart; |
| 683 | } | 740 | } |
| 684 | if (ctx->sequence != push_seq) | 741 | if (ctx->sequence != sequence) |
| 685 | continue; | 742 | continue; |
| 686 | /* found it! */ | 743 | /* found it! */ |
| 687 | commit_lsn = ctx->commit_lsn; | 744 | commit_lsn = ctx->commit_lsn; |
