aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_log.c
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2010-12-20 20:29:01 -0500
committerDave Chinner <david@fromorbit.com>2010-12-20 20:29:01 -0500
commit3f16b9850743b702380f098ab5e0308cd6af1792 (patch)
tree6a39db2c6b750fab92e16865383f733e48aa96ed /fs/xfs/xfs_log.c
parentc8a09ff8ca2235bccdaea8a52fbd5349646a8ba4 (diff)
xfs: introduce new locks for the log grant ticket wait queues
The log grant ticket wait queues are currently protected by the log grant lock. However, the queues are functionally independent from each other, and operations on them only require serialisation against other queue operations now that all of the other log variables they use are atomic values. Hence, we can make them independent of the grant lock by introducing new locks just to protect the lists operations. because the lists are independent, we can use a lock per list and ensure that reserve and write head queuing do not contend. To ensure forced shutdowns work correctly in conjunction with the new fast paths, ensure that we check whether the log has been shut down in the grant functions once we hold the relevant spin locks but before we go to sleep. This is needed to co-ordinate correctly with the wakeups that are issued on the ticket queues so we don't leave any processes sleeping on the queues during a shutdown. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'fs/xfs/xfs_log.c')
-rw-r--r--fs/xfs/xfs_log.c139
1 files changed, 83 insertions, 56 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index a1d7d12fc51f..6fcc9d0af524 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -682,12 +682,12 @@ xfs_log_move_tail(xfs_mount_t *mp,
682 if (tail_lsn != 1) 682 if (tail_lsn != 1)
683 atomic64_set(&log->l_tail_lsn, tail_lsn); 683 atomic64_set(&log->l_tail_lsn, tail_lsn);
684 684
685 spin_lock(&log->l_grant_lock); 685 if (!list_empty_careful(&log->l_writeq)) {
686 if (!list_empty(&log->l_writeq)) {
687#ifdef DEBUG 686#ifdef DEBUG
688 if (log->l_flags & XLOG_ACTIVE_RECOVERY) 687 if (log->l_flags & XLOG_ACTIVE_RECOVERY)
689 panic("Recovery problem"); 688 panic("Recovery problem");
690#endif 689#endif
690 spin_lock(&log->l_grant_write_lock);
691 free_bytes = xlog_space_left(log, &log->l_grant_write_head); 691 free_bytes = xlog_space_left(log, &log->l_grant_write_head);
692 list_for_each_entry(tic, &log->l_writeq, t_queue) { 692 list_for_each_entry(tic, &log->l_writeq, t_queue) {
693 ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); 693 ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
@@ -696,15 +696,18 @@ xfs_log_move_tail(xfs_mount_t *mp,
696 break; 696 break;
697 tail_lsn = 0; 697 tail_lsn = 0;
698 free_bytes -= tic->t_unit_res; 698 free_bytes -= tic->t_unit_res;
699 trace_xfs_log_regrant_write_wake_up(log, tic);
699 wake_up(&tic->t_wait); 700 wake_up(&tic->t_wait);
700 } 701 }
702 spin_unlock(&log->l_grant_write_lock);
701 } 703 }
702 704
703 if (!list_empty(&log->l_reserveq)) { 705 if (!list_empty_careful(&log->l_reserveq)) {
704#ifdef DEBUG 706#ifdef DEBUG
705 if (log->l_flags & XLOG_ACTIVE_RECOVERY) 707 if (log->l_flags & XLOG_ACTIVE_RECOVERY)
706 panic("Recovery problem"); 708 panic("Recovery problem");
707#endif 709#endif
710 spin_lock(&log->l_grant_reserve_lock);
708 free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); 711 free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
709 list_for_each_entry(tic, &log->l_reserveq, t_queue) { 712 list_for_each_entry(tic, &log->l_reserveq, t_queue) {
710 if (tic->t_flags & XLOG_TIC_PERM_RESERV) 713 if (tic->t_flags & XLOG_TIC_PERM_RESERV)
@@ -715,11 +718,12 @@ xfs_log_move_tail(xfs_mount_t *mp,
715 break; 718 break;
716 tail_lsn = 0; 719 tail_lsn = 0;
717 free_bytes -= need_bytes; 720 free_bytes -= need_bytes;
721 trace_xfs_log_grant_wake_up(log, tic);
718 wake_up(&tic->t_wait); 722 wake_up(&tic->t_wait);
719 } 723 }
724 spin_unlock(&log->l_grant_reserve_lock);
720 } 725 }
721 spin_unlock(&log->l_grant_lock); 726}
722} /* xfs_log_move_tail */
723 727
724/* 728/*
725 * Determine if we have a transaction that has gone to disk 729 * Determine if we have a transaction that has gone to disk
@@ -1010,6 +1014,8 @@ xlog_alloc_log(xfs_mount_t *mp,
1010 xlog_assign_grant_head(&log->l_grant_write_head, 1, 0); 1014 xlog_assign_grant_head(&log->l_grant_write_head, 1, 0);
1011 INIT_LIST_HEAD(&log->l_reserveq); 1015 INIT_LIST_HEAD(&log->l_reserveq);
1012 INIT_LIST_HEAD(&log->l_writeq); 1016 INIT_LIST_HEAD(&log->l_writeq);
1017 spin_lock_init(&log->l_grant_reserve_lock);
1018 spin_lock_init(&log->l_grant_write_lock);
1013 1019
1014 error = EFSCORRUPTED; 1020 error = EFSCORRUPTED;
1015 if (xfs_sb_version_hassector(&mp->m_sb)) { 1021 if (xfs_sb_version_hassector(&mp->m_sb)) {
@@ -2477,6 +2483,18 @@ restart:
2477 * 2483 *
2478 * Once a ticket gets put onto the reserveq, it will only return after 2484 * Once a ticket gets put onto the reserveq, it will only return after
2479 * the needed reservation is satisfied. 2485 * the needed reservation is satisfied.
2486 *
2487 * This function is structured so that it has a lock free fast path. This is
2488 * necessary because every new transaction reservation will come through this
2489 * path. Hence any lock will be globally hot if we take it unconditionally on
2490 * every pass.
2491 *
2492 * As tickets are only ever moved on and off the reserveq under the
2493 * l_grant_reserve_lock, we only need to take that lock if we are going
2494 * to add the ticket to the queue and sleep. We can avoid taking the lock if the
2495 * ticket was never added to the reserveq because the t_queue list head will be
2496 * empty and we hold the only reference to it so it can safely be checked
2497 * unlocked.
2480 */ 2498 */
2481STATIC int 2499STATIC int
2482xlog_grant_log_space(xlog_t *log, 2500xlog_grant_log_space(xlog_t *log,
@@ -2490,13 +2508,20 @@ xlog_grant_log_space(xlog_t *log,
2490 panic("grant Recovery problem"); 2508 panic("grant Recovery problem");
2491#endif 2509#endif
2492 2510
2493 /* Is there space or do we need to sleep? */
2494 spin_lock(&log->l_grant_lock);
2495
2496 trace_xfs_log_grant_enter(log, tic); 2511 trace_xfs_log_grant_enter(log, tic);
2497 2512
2513 need_bytes = tic->t_unit_res;
2514 if (tic->t_flags & XFS_LOG_PERM_RESERV)
2515 need_bytes *= tic->t_ocnt;
2516
2498 /* something is already sleeping; insert new transaction at end */ 2517 /* something is already sleeping; insert new transaction at end */
2499 if (!list_empty(&log->l_reserveq)) { 2518 if (!list_empty_careful(&log->l_reserveq)) {
2519 spin_lock(&log->l_grant_reserve_lock);
2520 /* recheck the queue now we are locked */
2521 if (list_empty(&log->l_reserveq)) {
2522 spin_unlock(&log->l_grant_reserve_lock);
2523 goto redo;
2524 }
2500 list_add_tail(&tic->t_queue, &log->l_reserveq); 2525 list_add_tail(&tic->t_queue, &log->l_reserveq);
2501 2526
2502 trace_xfs_log_grant_sleep1(log, tic); 2527 trace_xfs_log_grant_sleep1(log, tic);
@@ -2509,48 +2534,47 @@ xlog_grant_log_space(xlog_t *log,
2509 goto error_return; 2534 goto error_return;
2510 2535
2511 XFS_STATS_INC(xs_sleep_logspace); 2536 XFS_STATS_INC(xs_sleep_logspace);
2512 xlog_wait(&tic->t_wait, &log->l_grant_lock); 2537 xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
2513 2538
2514 /* 2539 /*
2515 * If we got an error, and the filesystem is shutting down, 2540 * If we got an error, and the filesystem is shutting down,
2516 * we'll catch it down below. So just continue... 2541 * we'll catch it down below. So just continue...
2517 */ 2542 */
2518 trace_xfs_log_grant_wake1(log, tic); 2543 trace_xfs_log_grant_wake1(log, tic);
2519 spin_lock(&log->l_grant_lock);
2520 } 2544 }
2521 if (tic->t_flags & XFS_LOG_PERM_RESERV)
2522 need_bytes = tic->t_unit_res*tic->t_ocnt;
2523 else
2524 need_bytes = tic->t_unit_res;
2525 2545
2526redo: 2546redo:
2527 if (XLOG_FORCED_SHUTDOWN(log)) 2547 if (XLOG_FORCED_SHUTDOWN(log))
2528 goto error_return; 2548 goto error_return_unlocked;
2529 2549
2530 free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); 2550 free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
2531 if (free_bytes < need_bytes) { 2551 if (free_bytes < need_bytes) {
2552 spin_lock(&log->l_grant_reserve_lock);
2532 if (list_empty(&tic->t_queue)) 2553 if (list_empty(&tic->t_queue))
2533 list_add_tail(&tic->t_queue, &log->l_reserveq); 2554 list_add_tail(&tic->t_queue, &log->l_reserveq);
2534 2555
2535 trace_xfs_log_grant_sleep2(log, tic); 2556 trace_xfs_log_grant_sleep2(log, tic);
2536 2557
2558 if (XLOG_FORCED_SHUTDOWN(log))
2559 goto error_return;
2560
2537 xlog_grant_push_ail(log, need_bytes); 2561 xlog_grant_push_ail(log, need_bytes);
2538 2562
2539 XFS_STATS_INC(xs_sleep_logspace); 2563 XFS_STATS_INC(xs_sleep_logspace);
2540 xlog_wait(&tic->t_wait, &log->l_grant_lock); 2564 xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
2541
2542 spin_lock(&log->l_grant_lock);
2543 if (XLOG_FORCED_SHUTDOWN(log))
2544 goto error_return;
2545 2565
2546 trace_xfs_log_grant_wake2(log, tic); 2566 trace_xfs_log_grant_wake2(log, tic);
2547
2548 goto redo; 2567 goto redo;
2549 } 2568 }
2550 2569
2551 list_del_init(&tic->t_queue); 2570 if (!list_empty(&tic->t_queue)) {
2571 spin_lock(&log->l_grant_reserve_lock);
2572 list_del_init(&tic->t_queue);
2573 spin_unlock(&log->l_grant_reserve_lock);
2574 }
2552 2575
2553 /* we've got enough space */ 2576 /* we've got enough space */
2577 spin_lock(&log->l_grant_lock);
2554 xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes); 2578 xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes);
2555 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); 2579 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
2556 trace_xfs_log_grant_exit(log, tic); 2580 trace_xfs_log_grant_exit(log, tic);
@@ -2559,8 +2583,11 @@ redo:
2559 spin_unlock(&log->l_grant_lock); 2583 spin_unlock(&log->l_grant_lock);
2560 return 0; 2584 return 0;
2561 2585
2562 error_return: 2586error_return_unlocked:
2587 spin_lock(&log->l_grant_reserve_lock);
2588error_return:
2563 list_del_init(&tic->t_queue); 2589 list_del_init(&tic->t_queue);
2590 spin_unlock(&log->l_grant_reserve_lock);
2564 trace_xfs_log_grant_error(log, tic); 2591 trace_xfs_log_grant_error(log, tic);
2565 2592
2566 /* 2593 /*
@@ -2570,7 +2597,6 @@ redo:
2570 */ 2597 */
2571 tic->t_curr_res = 0; 2598 tic->t_curr_res = 0;
2572 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ 2599 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
2573 spin_unlock(&log->l_grant_lock);
2574 return XFS_ERROR(EIO); 2600 return XFS_ERROR(EIO);
2575} /* xlog_grant_log_space */ 2601} /* xlog_grant_log_space */
2576 2602
@@ -2578,7 +2604,8 @@ redo:
2578/* 2604/*
2579 * Replenish the byte reservation required by moving the grant write head. 2605 * Replenish the byte reservation required by moving the grant write head.
2580 * 2606 *
2581 * 2607 * Similar to xlog_grant_log_space, the function is structured to have a lock
2608 * free fast path.
2582 */ 2609 */
2583STATIC int 2610STATIC int
2584xlog_regrant_write_log_space(xlog_t *log, 2611xlog_regrant_write_log_space(xlog_t *log,
@@ -2597,12 +2624,9 @@ xlog_regrant_write_log_space(xlog_t *log,
2597 panic("regrant Recovery problem"); 2624 panic("regrant Recovery problem");
2598#endif 2625#endif
2599 2626
2600 spin_lock(&log->l_grant_lock);
2601
2602 trace_xfs_log_regrant_write_enter(log, tic); 2627 trace_xfs_log_regrant_write_enter(log, tic);
2603
2604 if (XLOG_FORCED_SHUTDOWN(log)) 2628 if (XLOG_FORCED_SHUTDOWN(log))
2605 goto error_return; 2629 goto error_return_unlocked;
2606 2630
2607 /* If there are other waiters on the queue then give them a 2631 /* If there are other waiters on the queue then give them a
2608 * chance at logspace before us. Wake up the first waiters, 2632 * chance at logspace before us. Wake up the first waiters,
@@ -2611,8 +2635,10 @@ xlog_regrant_write_log_space(xlog_t *log,
2611 * this transaction. 2635 * this transaction.
2612 */ 2636 */
2613 need_bytes = tic->t_unit_res; 2637 need_bytes = tic->t_unit_res;
2614 if (!list_empty(&log->l_writeq)) { 2638 if (!list_empty_careful(&log->l_writeq)) {
2615 struct xlog_ticket *ntic; 2639 struct xlog_ticket *ntic;
2640
2641 spin_lock(&log->l_grant_write_lock);
2616 free_bytes = xlog_space_left(log, &log->l_grant_write_head); 2642 free_bytes = xlog_space_left(log, &log->l_grant_write_head);
2617 list_for_each_entry(ntic, &log->l_writeq, t_queue) { 2643 list_for_each_entry(ntic, &log->l_writeq, t_queue) {
2618 ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV); 2644 ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV);
@@ -2627,50 +2653,48 @@ xlog_regrant_write_log_space(xlog_t *log,
2627 struct xlog_ticket, t_queue)) { 2653 struct xlog_ticket, t_queue)) {
2628 if (list_empty(&tic->t_queue)) 2654 if (list_empty(&tic->t_queue))
2629 list_add_tail(&tic->t_queue, &log->l_writeq); 2655 list_add_tail(&tic->t_queue, &log->l_writeq);
2630
2631 trace_xfs_log_regrant_write_sleep1(log, tic); 2656 trace_xfs_log_regrant_write_sleep1(log, tic);
2632 2657
2633 xlog_grant_push_ail(log, need_bytes); 2658 xlog_grant_push_ail(log, need_bytes);
2634 2659
2635 XFS_STATS_INC(xs_sleep_logspace); 2660 XFS_STATS_INC(xs_sleep_logspace);
2636 xlog_wait(&tic->t_wait, &log->l_grant_lock); 2661 xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
2637
2638 /* If we're shutting down, this tic is already
2639 * off the queue */
2640 spin_lock(&log->l_grant_lock);
2641 if (XLOG_FORCED_SHUTDOWN(log))
2642 goto error_return;
2643
2644 trace_xfs_log_regrant_write_wake1(log, tic); 2662 trace_xfs_log_regrant_write_wake1(log, tic);
2645 } 2663 } else
2664 spin_unlock(&log->l_grant_write_lock);
2646 } 2665 }
2647 2666
2648redo: 2667redo:
2649 if (XLOG_FORCED_SHUTDOWN(log)) 2668 if (XLOG_FORCED_SHUTDOWN(log))
2650 goto error_return; 2669 goto error_return_unlocked;
2651 2670
2652 free_bytes = xlog_space_left(log, &log->l_grant_write_head); 2671 free_bytes = xlog_space_left(log, &log->l_grant_write_head);
2653 if (free_bytes < need_bytes) { 2672 if (free_bytes < need_bytes) {
2673 spin_lock(&log->l_grant_write_lock);
2654 if (list_empty(&tic->t_queue)) 2674 if (list_empty(&tic->t_queue))
2655 list_add_tail(&tic->t_queue, &log->l_writeq); 2675 list_add_tail(&tic->t_queue, &log->l_writeq);
2676
2677 if (XLOG_FORCED_SHUTDOWN(log))
2678 goto error_return;
2679
2656 xlog_grant_push_ail(log, need_bytes); 2680 xlog_grant_push_ail(log, need_bytes);
2657 2681
2658 XFS_STATS_INC(xs_sleep_logspace); 2682 XFS_STATS_INC(xs_sleep_logspace);
2659 trace_xfs_log_regrant_write_sleep2(log, tic); 2683 trace_xfs_log_regrant_write_sleep2(log, tic);
2660 xlog_wait(&tic->t_wait, &log->l_grant_lock); 2684 xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
2661
2662 /* If we're shutting down, this tic is already off the queue */
2663 spin_lock(&log->l_grant_lock);
2664 if (XLOG_FORCED_SHUTDOWN(log))
2665 goto error_return;
2666 2685
2667 trace_xfs_log_regrant_write_wake2(log, tic); 2686 trace_xfs_log_regrant_write_wake2(log, tic);
2668 goto redo; 2687 goto redo;
2669 } 2688 }
2670 2689
2671 list_del_init(&tic->t_queue); 2690 if (!list_empty(&tic->t_queue)) {
2691 spin_lock(&log->l_grant_write_lock);
2692 list_del_init(&tic->t_queue);
2693 spin_unlock(&log->l_grant_write_lock);
2694 }
2672 2695
2673 /* we've got enough space */ 2696 /* we've got enough space */
2697 spin_lock(&log->l_grant_lock);
2674 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); 2698 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
2675 trace_xfs_log_regrant_write_exit(log, tic); 2699 trace_xfs_log_regrant_write_exit(log, tic);
2676 xlog_verify_grant_head(log, 1); 2700 xlog_verify_grant_head(log, 1);
@@ -2679,8 +2703,11 @@ redo:
2679 return 0; 2703 return 0;
2680 2704
2681 2705
2706 error_return_unlocked:
2707 spin_lock(&log->l_grant_write_lock);
2682 error_return: 2708 error_return:
2683 list_del_init(&tic->t_queue); 2709 list_del_init(&tic->t_queue);
2710 spin_unlock(&log->l_grant_write_lock);
2684 trace_xfs_log_regrant_write_error(log, tic); 2711 trace_xfs_log_regrant_write_error(log, tic);
2685 2712
2686 /* 2713 /*
@@ -2690,7 +2717,6 @@ redo:
2690 */ 2717 */
2691 tic->t_curr_res = 0; 2718 tic->t_curr_res = 0;
2692 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ 2719 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
2693 spin_unlock(&log->l_grant_lock);
2694 return XFS_ERROR(EIO); 2720 return XFS_ERROR(EIO);
2695} /* xlog_regrant_write_log_space */ 2721} /* xlog_regrant_write_log_space */
2696 2722
@@ -3664,12 +3690,10 @@ xfs_log_force_umount(
3664 xlog_cil_force(log); 3690 xlog_cil_force(log);
3665 3691
3666 /* 3692 /*
3667 * We must hold both the GRANT lock and the LOG lock, 3693 * mark the filesystem and the as in a shutdown state and wake
3668 * before we mark the filesystem SHUTDOWN and wake 3694 * everybody up to tell them the bad news.
3669 * everybody up to tell the bad news.
3670 */ 3695 */
3671 spin_lock(&log->l_icloglock); 3696 spin_lock(&log->l_icloglock);
3672 spin_lock(&log->l_grant_lock);
3673 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; 3697 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
3674 if (mp->m_sb_bp) 3698 if (mp->m_sb_bp)
3675 XFS_BUF_DONE(mp->m_sb_bp); 3699 XFS_BUF_DONE(mp->m_sb_bp);
@@ -3694,14 +3718,17 @@ xfs_log_force_umount(
3694 * means we have to wake up everybody queued up on reserveq as well as 3718 * means we have to wake up everybody queued up on reserveq as well as
3695 * writeq. In addition, we make sure in xlog_{re}grant_log_space that 3719 * writeq. In addition, we make sure in xlog_{re}grant_log_space that
3696 * we don't enqueue anything once the SHUTDOWN flag is set, and this 3720 * we don't enqueue anything once the SHUTDOWN flag is set, and this
3697 * action is protected by the GRANTLOCK. 3721 * action is protected by the grant locks.
3698 */ 3722 */
3723 spin_lock(&log->l_grant_reserve_lock);
3699 list_for_each_entry(tic, &log->l_reserveq, t_queue) 3724 list_for_each_entry(tic, &log->l_reserveq, t_queue)
3700 wake_up(&tic->t_wait); 3725 wake_up(&tic->t_wait);
3726 spin_unlock(&log->l_grant_reserve_lock);
3701 3727
3728 spin_lock(&log->l_grant_write_lock);
3702 list_for_each_entry(tic, &log->l_writeq, t_queue) 3729 list_for_each_entry(tic, &log->l_writeq, t_queue)
3703 wake_up(&tic->t_wait); 3730 wake_up(&tic->t_wait);
3704 spin_unlock(&log->l_grant_lock); 3731 spin_unlock(&log->l_grant_write_lock);
3705 3732
3706 if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { 3733 if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
3707 ASSERT(!logerror); 3734 ASSERT(!logerror);