aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2011-11-28 03:17:36 -0500
committerBen Myers <bpm@sgi.com>2011-12-06 15:19:47 -0500
commit9f9c19ec1a59422c7687b11847ed3408128aa0d6 (patch)
tree55a7dbf7422f4b18e32ba5e0f5378c959c4e12ba /fs/xfs
parentc29f7d457ac63311feb11928a866efd2fe153d74 (diff)
xfs: fix the logspace waiting algorithm
Apply the scheme used in log_regrant_write_log_space to wake up any other threads waiting for log space before the newly added one to log_regrant_write_log_space as well, and factor the code into readable helpers. For each of the queues we have add two helpers: - one to try to wake up all waiting threads. This helper will also be usable by xfs_log_move_tail once we remove the current opportunistic wakeups in it. - one to sleep on t_wait until enough log space is available, loosely modelled after Linux waitqueues. And use them to reimplement the guts of log_regrant_write_log_space and log_regrant_write_log_space. These two function now use one and the same algorithm for waiting on log space instead of subtly different ones before, with an option to completely unify them in the near future. Also move the filesystem shutdown handling to the common caller given that we had to touch it anyway. Based on hard debugging and an earlier patch from Chandra Seetharaman <sekharan@us.ibm.com>. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Chandra Seetharaman <sekharan@us.ibm.com> Tested-by: Chandra Seetharaman <sekharan@us.ibm.com> Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_log.c348
-rw-r--r--fs/xfs/xfs_trace.h12
2 files changed, 177 insertions, 183 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index a14cd89fe46..34817adf4b9 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -150,6 +150,117 @@ xlog_grant_add_space(
150 } while (head_val != old); 150 } while (head_val != old);
151} 151}
152 152
153STATIC bool
154xlog_reserveq_wake(
155 struct log *log,
156 int *free_bytes)
157{
158 struct xlog_ticket *tic;
159 int need_bytes;
160
161 list_for_each_entry(tic, &log->l_reserveq, t_queue) {
162 if (tic->t_flags & XLOG_TIC_PERM_RESERV)
163 need_bytes = tic->t_unit_res * tic->t_cnt;
164 else
165 need_bytes = tic->t_unit_res;
166
167 if (*free_bytes < need_bytes)
168 return false;
169 *free_bytes -= need_bytes;
170
171 trace_xfs_log_grant_wake_up(log, tic);
172 wake_up(&tic->t_wait);
173 }
174
175 return true;
176}
177
178STATIC bool
179xlog_writeq_wake(
180 struct log *log,
181 int *free_bytes)
182{
183 struct xlog_ticket *tic;
184 int need_bytes;
185
186 list_for_each_entry(tic, &log->l_writeq, t_queue) {
187 ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
188
189 need_bytes = tic->t_unit_res;
190
191 if (*free_bytes < need_bytes)
192 return false;
193 *free_bytes -= need_bytes;
194
195 trace_xfs_log_regrant_write_wake_up(log, tic);
196 wake_up(&tic->t_wait);
197 }
198
199 return true;
200}
201
202STATIC int
203xlog_reserveq_wait(
204 struct log *log,
205 struct xlog_ticket *tic,
206 int need_bytes)
207{
208 list_add_tail(&tic->t_queue, &log->l_reserveq);
209
210 do {
211 if (XLOG_FORCED_SHUTDOWN(log))
212 goto shutdown;
213 xlog_grant_push_ail(log, need_bytes);
214
215 XFS_STATS_INC(xs_sleep_logspace);
216 trace_xfs_log_grant_sleep(log, tic);
217
218 xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
219 trace_xfs_log_grant_wake(log, tic);
220
221 spin_lock(&log->l_grant_reserve_lock);
222 if (XLOG_FORCED_SHUTDOWN(log))
223 goto shutdown;
224 } while (xlog_space_left(log, &log->l_grant_reserve_head) < need_bytes);
225
226 list_del_init(&tic->t_queue);
227 return 0;
228shutdown:
229 list_del_init(&tic->t_queue);
230 return XFS_ERROR(EIO);
231}
232
233STATIC int
234xlog_writeq_wait(
235 struct log *log,
236 struct xlog_ticket *tic,
237 int need_bytes)
238{
239 list_add_tail(&tic->t_queue, &log->l_writeq);
240
241 do {
242 if (XLOG_FORCED_SHUTDOWN(log))
243 goto shutdown;
244 xlog_grant_push_ail(log, need_bytes);
245
246 XFS_STATS_INC(xs_sleep_logspace);
247 trace_xfs_log_regrant_write_sleep(log, tic);
248
249 xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
250 trace_xfs_log_regrant_write_wake(log, tic);
251
252 spin_lock(&log->l_grant_write_lock);
253 if (XLOG_FORCED_SHUTDOWN(log))
254 goto shutdown;
255 } while (xlog_space_left(log, &log->l_grant_write_head) < need_bytes);
256
257 list_del_init(&tic->t_queue);
258 return 0;
259shutdown:
260 list_del_init(&tic->t_queue);
261 return XFS_ERROR(EIO);
262}
263
153static void 264static void
154xlog_tic_reset_res(xlog_ticket_t *tic) 265xlog_tic_reset_res(xlog_ticket_t *tic)
155{ 266{
@@ -350,8 +461,19 @@ xfs_log_reserve(
350 retval = xlog_grant_log_space(log, internal_ticket); 461 retval = xlog_grant_log_space(log, internal_ticket);
351 } 462 }
352 463
464 if (unlikely(retval)) {
465 /*
466 * If we are failing, make sure the ticket doesn't have any
467 * current reservations. We don't want to add this back
468 * when the ticket/ transaction gets cancelled.
469 */
470 internal_ticket->t_curr_res = 0;
471 /* ungrant will give back unit_res * t_cnt. */
472 internal_ticket->t_cnt = 0;
473 }
474
353 return retval; 475 return retval;
354} /* xfs_log_reserve */ 476}
355 477
356 478
357/* 479/*
@@ -2481,8 +2603,8 @@ restart:
2481/* 2603/*
2482 * Atomically get the log space required for a log ticket. 2604 * Atomically get the log space required for a log ticket.
2483 * 2605 *
2484 * Once a ticket gets put onto the reserveq, it will only return after 2606 * Once a ticket gets put onto the reserveq, it will only return after the
2485 * the needed reservation is satisfied. 2607 * needed reservation is satisfied.
2486 * 2608 *
2487 * This function is structured so that it has a lock free fast path. This is 2609 * This function is structured so that it has a lock free fast path. This is
2488 * necessary because every new transaction reservation will come through this 2610 * necessary because every new transaction reservation will come through this
@@ -2490,113 +2612,53 @@ restart:
2490 * every pass. 2612 * every pass.
2491 * 2613 *
2492 * As tickets are only ever moved on and off the reserveq under the 2614 * As tickets are only ever moved on and off the reserveq under the
2493 * l_grant_reserve_lock, we only need to take that lock if we are going 2615 * l_grant_reserve_lock, we only need to take that lock if we are going to add
2494 * to add the ticket to the queue and sleep. We can avoid taking the lock if the 2616 * the ticket to the queue and sleep. We can avoid taking the lock if the ticket
2495 * ticket was never added to the reserveq because the t_queue list head will be 2617 * was never added to the reserveq because the t_queue list head will be empty
2496 * empty and we hold the only reference to it so it can safely be checked 2618 * and we hold the only reference to it so it can safely be checked unlocked.
2497 * unlocked.
2498 */ 2619 */
2499STATIC int 2620STATIC int
2500xlog_grant_log_space(xlog_t *log, 2621xlog_grant_log_space(
2501 xlog_ticket_t *tic) 2622 struct log *log,
2623 struct xlog_ticket *tic)
2502{ 2624{
2503 int free_bytes; 2625 int free_bytes, need_bytes;
2504 int need_bytes; 2626 int error = 0;
2505 2627
2506#ifdef DEBUG 2628 ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
2507 if (log->l_flags & XLOG_ACTIVE_RECOVERY)
2508 panic("grant Recovery problem");
2509#endif
2510 2629
2511 trace_xfs_log_grant_enter(log, tic); 2630 trace_xfs_log_grant_enter(log, tic);
2512 2631
2632 /*
2633 * If there are other waiters on the queue then give them a chance at
2634 * logspace before us. Wake up the first waiters, if we do not wake
2635 * up all the waiters then go to sleep waiting for more free space,
2636 * otherwise try to get some space for this transaction.
2637 */
2513 need_bytes = tic->t_unit_res; 2638 need_bytes = tic->t_unit_res;
2514 if (tic->t_flags & XFS_LOG_PERM_RESERV) 2639 if (tic->t_flags & XFS_LOG_PERM_RESERV)
2515 need_bytes *= tic->t_ocnt; 2640 need_bytes *= tic->t_ocnt;
2516
2517 /* something is already sleeping; insert new transaction at end */
2518 if (!list_empty_careful(&log->l_reserveq)) {
2519 spin_lock(&log->l_grant_reserve_lock);
2520 /* recheck the queue now we are locked */
2521 if (list_empty(&log->l_reserveq)) {
2522 spin_unlock(&log->l_grant_reserve_lock);
2523 goto redo;
2524 }
2525 list_add_tail(&tic->t_queue, &log->l_reserveq);
2526
2527 trace_xfs_log_grant_sleep1(log, tic);
2528
2529 /*
2530 * Gotta check this before going to sleep, while we're
2531 * holding the grant lock.
2532 */
2533 if (XLOG_FORCED_SHUTDOWN(log))
2534 goto error_return;
2535
2536 XFS_STATS_INC(xs_sleep_logspace);
2537 xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
2538
2539 /*
2540 * If we got an error, and the filesystem is shutting down,
2541 * we'll catch it down below. So just continue...
2542 */
2543 trace_xfs_log_grant_wake1(log, tic);
2544 }
2545
2546redo:
2547 if (XLOG_FORCED_SHUTDOWN(log))
2548 goto error_return_unlocked;
2549
2550 free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); 2641 free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
2551 if (free_bytes < need_bytes) { 2642 if (!list_empty_careful(&log->l_reserveq)) {
2552 spin_lock(&log->l_grant_reserve_lock); 2643 spin_lock(&log->l_grant_reserve_lock);
2553 if (list_empty(&tic->t_queue)) 2644 if (!xlog_reserveq_wake(log, &free_bytes) ||
2554 list_add_tail(&tic->t_queue, &log->l_reserveq); 2645 free_bytes < need_bytes)
2555 2646 error = xlog_reserveq_wait(log, tic, need_bytes);
2556 trace_xfs_log_grant_sleep2(log, tic); 2647 spin_unlock(&log->l_grant_reserve_lock);
2557 2648 } else if (free_bytes < need_bytes) {
2558 if (XLOG_FORCED_SHUTDOWN(log))
2559 goto error_return;
2560
2561 xlog_grant_push_ail(log, need_bytes);
2562
2563 XFS_STATS_INC(xs_sleep_logspace);
2564 xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
2565
2566 trace_xfs_log_grant_wake2(log, tic);
2567 goto redo;
2568 }
2569
2570 if (!list_empty(&tic->t_queue)) {
2571 spin_lock(&log->l_grant_reserve_lock); 2649 spin_lock(&log->l_grant_reserve_lock);
2572 list_del_init(&tic->t_queue); 2650 error = xlog_reserveq_wait(log, tic, need_bytes);
2573 spin_unlock(&log->l_grant_reserve_lock); 2651 spin_unlock(&log->l_grant_reserve_lock);
2574 } 2652 }
2653 if (error)
2654 return error;
2575 2655
2576 /* we've got enough space */
2577 xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes); 2656 xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes);
2578 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); 2657 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
2579 trace_xfs_log_grant_exit(log, tic); 2658 trace_xfs_log_grant_exit(log, tic);
2580 xlog_verify_grant_tail(log); 2659 xlog_verify_grant_tail(log);
2581 return 0; 2660 return 0;
2582 2661}
2583error_return_unlocked:
2584 spin_lock(&log->l_grant_reserve_lock);
2585error_return:
2586 list_del_init(&tic->t_queue);
2587 spin_unlock(&log->l_grant_reserve_lock);
2588 trace_xfs_log_grant_error(log, tic);
2589
2590 /*
2591 * If we are failing, make sure the ticket doesn't have any
2592 * current reservations. We don't want to add this back when
2593 * the ticket/transaction gets cancelled.
2594 */
2595 tic->t_curr_res = 0;
2596 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
2597 return XFS_ERROR(EIO);
2598} /* xlog_grant_log_space */
2599
2600 2662
2601/* 2663/*
2602 * Replenish the byte reservation required by moving the grant write head. 2664 * Replenish the byte reservation required by moving the grant write head.
@@ -2605,10 +2667,12 @@ error_return:
2605 * free fast path. 2667 * free fast path.
2606 */ 2668 */
2607STATIC int 2669STATIC int
2608xlog_regrant_write_log_space(xlog_t *log, 2670xlog_regrant_write_log_space(
2609 xlog_ticket_t *tic) 2671 struct log *log,
2672 struct xlog_ticket *tic)
2610{ 2673{
2611 int free_bytes, need_bytes; 2674 int free_bytes, need_bytes;
2675 int error = 0;
2612 2676
2613 tic->t_curr_res = tic->t_unit_res; 2677 tic->t_curr_res = tic->t_unit_res;
2614 xlog_tic_reset_res(tic); 2678 xlog_tic_reset_res(tic);
@@ -2616,104 +2680,38 @@ xlog_regrant_write_log_space(xlog_t *log,
2616 if (tic->t_cnt > 0) 2680 if (tic->t_cnt > 0)
2617 return 0; 2681 return 0;
2618 2682
2619#ifdef DEBUG 2683 ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
2620 if (log->l_flags & XLOG_ACTIVE_RECOVERY)
2621 panic("regrant Recovery problem");
2622#endif
2623 2684
2624 trace_xfs_log_regrant_write_enter(log, tic); 2685 trace_xfs_log_regrant_write_enter(log, tic);
2625 if (XLOG_FORCED_SHUTDOWN(log))
2626 goto error_return_unlocked;
2627 2686
2628 /* If there are other waiters on the queue then give them a 2687 /*
2629 * chance at logspace before us. Wake up the first waiters, 2688 * If there are other waiters on the queue then give them a chance at
2630 * if we do not wake up all the waiters then go to sleep waiting 2689 * logspace before us. Wake up the first waiters, if we do not wake
2631 * for more free space, otherwise try to get some space for 2690 * up all the waiters then go to sleep waiting for more free space,
2632 * this transaction. 2691 * otherwise try to get some space for this transaction.
2633 */ 2692 */
2634 need_bytes = tic->t_unit_res; 2693 need_bytes = tic->t_unit_res;
2635 if (!list_empty_careful(&log->l_writeq)) {
2636 struct xlog_ticket *ntic;
2637
2638 spin_lock(&log->l_grant_write_lock);
2639 free_bytes = xlog_space_left(log, &log->l_grant_write_head);
2640 list_for_each_entry(ntic, &log->l_writeq, t_queue) {
2641 ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV);
2642
2643 if (free_bytes < ntic->t_unit_res)
2644 break;
2645 free_bytes -= ntic->t_unit_res;
2646 wake_up(&ntic->t_wait);
2647 }
2648
2649 if (ntic != list_first_entry(&log->l_writeq,
2650 struct xlog_ticket, t_queue)) {
2651 if (list_empty(&tic->t_queue))
2652 list_add_tail(&tic->t_queue, &log->l_writeq);
2653 trace_xfs_log_regrant_write_sleep1(log, tic);
2654
2655 xlog_grant_push_ail(log, need_bytes);
2656
2657 XFS_STATS_INC(xs_sleep_logspace);
2658 xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
2659 trace_xfs_log_regrant_write_wake1(log, tic);
2660 } else
2661 spin_unlock(&log->l_grant_write_lock);
2662 }
2663
2664redo:
2665 if (XLOG_FORCED_SHUTDOWN(log))
2666 goto error_return_unlocked;
2667
2668 free_bytes = xlog_space_left(log, &log->l_grant_write_head); 2694 free_bytes = xlog_space_left(log, &log->l_grant_write_head);
2669 if (free_bytes < need_bytes) { 2695 if (!list_empty_careful(&log->l_writeq)) {
2670 spin_lock(&log->l_grant_write_lock); 2696 spin_lock(&log->l_grant_write_lock);
2671 if (list_empty(&tic->t_queue)) 2697 if (!xlog_writeq_wake(log, &free_bytes) ||
2672 list_add_tail(&tic->t_queue, &log->l_writeq); 2698 free_bytes < need_bytes)
2673 2699 error = xlog_writeq_wait(log, tic, need_bytes);
2674 if (XLOG_FORCED_SHUTDOWN(log)) 2700 spin_unlock(&log->l_grant_write_lock);
2675 goto error_return; 2701 } else if (free_bytes < need_bytes) {
2676
2677 xlog_grant_push_ail(log, need_bytes);
2678
2679 XFS_STATS_INC(xs_sleep_logspace);
2680 trace_xfs_log_regrant_write_sleep2(log, tic);
2681 xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
2682
2683 trace_xfs_log_regrant_write_wake2(log, tic);
2684 goto redo;
2685 }
2686
2687 if (!list_empty(&tic->t_queue)) {
2688 spin_lock(&log->l_grant_write_lock); 2702 spin_lock(&log->l_grant_write_lock);
2689 list_del_init(&tic->t_queue); 2703 error = xlog_writeq_wait(log, tic, need_bytes);
2690 spin_unlock(&log->l_grant_write_lock); 2704 spin_unlock(&log->l_grant_write_lock);
2691 } 2705 }
2692 2706
2693 /* we've got enough space */ 2707 if (error)
2708 return error;
2709
2694 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); 2710 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
2695 trace_xfs_log_regrant_write_exit(log, tic); 2711 trace_xfs_log_regrant_write_exit(log, tic);
2696 xlog_verify_grant_tail(log); 2712 xlog_verify_grant_tail(log);
2697 return 0; 2713 return 0;
2698 2714}
2699
2700 error_return_unlocked:
2701 spin_lock(&log->l_grant_write_lock);
2702 error_return:
2703 list_del_init(&tic->t_queue);
2704 spin_unlock(&log->l_grant_write_lock);
2705 trace_xfs_log_regrant_write_error(log, tic);
2706
2707 /*
2708 * If we are failing, make sure the ticket doesn't have any
2709 * current reservations. We don't want to add this back when
2710 * the ticket/transaction gets cancelled.
2711 */
2712 tic->t_curr_res = 0;
2713 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
2714 return XFS_ERROR(EIO);
2715} /* xlog_regrant_write_log_space */
2716
2717 2715
2718/* The first cnt-1 times through here we don't need to 2716/* The first cnt-1 times through here we don't need to
2719 * move the grant write head because the permanent 2717 * move the grant write head because the permanent
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index f1d2802b2f0..49403579887 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -834,18 +834,14 @@ DEFINE_LOGGRANT_EVENT(xfs_log_umount_write);
834DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter); 834DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter);
835DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit); 835DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit);
836DEFINE_LOGGRANT_EVENT(xfs_log_grant_error); 836DEFINE_LOGGRANT_EVENT(xfs_log_grant_error);
837DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1); 837DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep);
838DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1); 838DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake);
839DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
840DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
841DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up); 839DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up);
842DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); 840DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
843DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); 841DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
844DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); 842DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
845DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1); 843DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep);
846DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1); 844DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake);
847DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
848DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
849DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up); 845DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up);
850DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); 846DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
851DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); 847DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);