aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c2
-rw-r--r--fs/xfs/xfs_log.c67
-rw-r--r--fs/xfs/xfs_log_priv.h12
3 files changed, 39 insertions, 42 deletions
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index 5cad3274db02..b52528bbbfff 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -33,7 +33,7 @@
33 33
34 34
35/* 35/*
36 * Dedicated vnode inactive/reclaim sync semaphores. 36 * Dedicated vnode inactive/reclaim sync wait queues.
37 * Prime number of hash buckets since address is used as the key. 37 * Prime number of hash buckets since address is used as the key.
38 */ 38 */
39#define NVSYNC 37 39#define NVSYNC 37
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 91b00a5686cd..0816c5d6d76b 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -357,11 +357,11 @@ xfs_log_done(xfs_mount_t *mp,
357 * Asynchronous forces are implemented by setting the WANT_SYNC 357 * Asynchronous forces are implemented by setting the WANT_SYNC
358 * bit in the appropriate in-core log and then returning. 358 * bit in the appropriate in-core log and then returning.
359 * 359 *
360 * Synchronous forces are implemented with a semaphore. All callers 360 * Synchronous forces are implemented with a signal variable. All callers
361 * to force a given lsn to disk will wait on a semaphore attached to the 361 * to force a given lsn to disk will wait on a the sv attached to the
362 * specific in-core log. When given in-core log finally completes its 362 * specific in-core log. When given in-core log finally completes its
363 * write to disk, that thread will wake up all threads waiting on the 363 * write to disk, that thread will wake up all threads waiting on the
364 * semaphore. 364 * sv.
365 */ 365 */
366int 366int
367_xfs_log_force( 367_xfs_log_force(
@@ -707,7 +707,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
707 if (!(iclog->ic_state == XLOG_STATE_ACTIVE || 707 if (!(iclog->ic_state == XLOG_STATE_ACTIVE ||
708 iclog->ic_state == XLOG_STATE_DIRTY)) { 708 iclog->ic_state == XLOG_STATE_DIRTY)) {
709 if (!XLOG_FORCED_SHUTDOWN(log)) { 709 if (!XLOG_FORCED_SHUTDOWN(log)) {
710 sv_wait(&iclog->ic_forcesema, PMEM, 710 sv_wait(&iclog->ic_force_wait, PMEM,
711 &log->l_icloglock, s); 711 &log->l_icloglock, s);
712 } else { 712 } else {
713 spin_unlock(&log->l_icloglock); 713 spin_unlock(&log->l_icloglock);
@@ -748,7 +748,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
748 || iclog->ic_state == XLOG_STATE_DIRTY 748 || iclog->ic_state == XLOG_STATE_DIRTY
749 || iclog->ic_state == XLOG_STATE_IOERROR) ) { 749 || iclog->ic_state == XLOG_STATE_IOERROR) ) {
750 750
751 sv_wait(&iclog->ic_forcesema, PMEM, 751 sv_wait(&iclog->ic_force_wait, PMEM,
752 &log->l_icloglock, s); 752 &log->l_icloglock, s);
753 } else { 753 } else {
754 spin_unlock(&log->l_icloglock); 754 spin_unlock(&log->l_icloglock);
@@ -838,7 +838,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
838 break; 838 break;
839 tail_lsn = 0; 839 tail_lsn = 0;
840 free_bytes -= tic->t_unit_res; 840 free_bytes -= tic->t_unit_res;
841 sv_signal(&tic->t_sema); 841 sv_signal(&tic->t_wait);
842 tic = tic->t_next; 842 tic = tic->t_next;
843 } while (tic != log->l_write_headq); 843 } while (tic != log->l_write_headq);
844 } 844 }
@@ -859,7 +859,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
859 break; 859 break;
860 tail_lsn = 0; 860 tail_lsn = 0;
861 free_bytes -= need_bytes; 861 free_bytes -= need_bytes;
862 sv_signal(&tic->t_sema); 862 sv_signal(&tic->t_wait);
863 tic = tic->t_next; 863 tic = tic->t_next;
864 } while (tic != log->l_reserve_headq); 864 } while (tic != log->l_reserve_headq);
865 } 865 }
@@ -1285,8 +1285,8 @@ xlog_alloc_log(xfs_mount_t *mp,
1285 1285
1286 ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); 1286 ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
1287 ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); 1287 ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
1288 sv_init(&iclog->ic_forcesema, SV_DEFAULT, "iclog-force"); 1288 sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force");
1289 sv_init(&iclog->ic_writesema, SV_DEFAULT, "iclog-write"); 1289 sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write");
1290 1290
1291 iclogp = &iclog->ic_next; 1291 iclogp = &iclog->ic_next;
1292 } 1292 }
@@ -1565,8 +1565,8 @@ xlog_dealloc_log(xlog_t *log)
1565 1565
1566 iclog = log->l_iclog; 1566 iclog = log->l_iclog;
1567 for (i=0; i<log->l_iclog_bufs; i++) { 1567 for (i=0; i<log->l_iclog_bufs; i++) {
1568 sv_destroy(&iclog->ic_forcesema); 1568 sv_destroy(&iclog->ic_force_wait);
1569 sv_destroy(&iclog->ic_writesema); 1569 sv_destroy(&iclog->ic_write_wait);
1570 xfs_buf_free(iclog->ic_bp); 1570 xfs_buf_free(iclog->ic_bp);
1571#ifdef XFS_LOG_TRACE 1571#ifdef XFS_LOG_TRACE
1572 if (iclog->ic_trace != NULL) { 1572 if (iclog->ic_trace != NULL) {
@@ -1976,7 +1976,7 @@ xlog_write(xfs_mount_t * mp,
1976/* Clean iclogs starting from the head. This ordering must be 1976/* Clean iclogs starting from the head. This ordering must be
1977 * maintained, so an iclog doesn't become ACTIVE beyond one that 1977 * maintained, so an iclog doesn't become ACTIVE beyond one that
1978 * is SYNCING. This is also required to maintain the notion that we use 1978 * is SYNCING. This is also required to maintain the notion that we use
1979 * a counting semaphore to hold off would be writers to the log when every 1979 * a ordered wait queue to hold off would be writers to the log when every
1980 * iclog is trying to sync to disk. 1980 * iclog is trying to sync to disk.
1981 * 1981 *
1982 * State Change: DIRTY -> ACTIVE 1982 * State Change: DIRTY -> ACTIVE
@@ -2240,7 +2240,7 @@ xlog_state_do_callback(
2240 xlog_state_clean_log(log); 2240 xlog_state_clean_log(log);
2241 2241
2242 /* wake up threads waiting in xfs_log_force() */ 2242 /* wake up threads waiting in xfs_log_force() */
2243 sv_broadcast(&iclog->ic_forcesema); 2243 sv_broadcast(&iclog->ic_force_wait);
2244 2244
2245 iclog = iclog->ic_next; 2245 iclog = iclog->ic_next;
2246 } while (first_iclog != iclog); 2246 } while (first_iclog != iclog);
@@ -2302,8 +2302,7 @@ xlog_state_do_callback(
2302 * the second completion goes through. 2302 * the second completion goes through.
2303 * 2303 *
2304 * Callbacks could take time, so they are done outside the scope of the 2304 * Callbacks could take time, so they are done outside the scope of the
2305 * global state machine log lock. Assume that the calls to cvsema won't 2305 * global state machine log lock.
2306 * take a long time. At least we know it won't sleep.
2307 */ 2306 */
2308STATIC void 2307STATIC void
2309xlog_state_done_syncing( 2308xlog_state_done_syncing(
@@ -2339,7 +2338,7 @@ xlog_state_done_syncing(
2339 * iclog buffer, we wake them all, one will get to do the 2338 * iclog buffer, we wake them all, one will get to do the
2340 * I/O, the others get to wait for the result. 2339 * I/O, the others get to wait for the result.
2341 */ 2340 */
2342 sv_broadcast(&iclog->ic_writesema); 2341 sv_broadcast(&iclog->ic_write_wait);
2343 spin_unlock(&log->l_icloglock); 2342 spin_unlock(&log->l_icloglock);
2344 xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ 2343 xlog_state_do_callback(log, aborted, iclog); /* also cleans log */
2345} /* xlog_state_done_syncing */ 2344} /* xlog_state_done_syncing */
@@ -2347,11 +2346,9 @@ xlog_state_done_syncing(
2347 2346
2348/* 2347/*
2349 * If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must 2348 * If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must
2350 * sleep. The flush semaphore is set to the number of in-core buffers and 2349 * sleep. We wait on the flush queue on the head iclog as that should be
2351 * decremented around disk syncing. Therefore, if all buffers are syncing, 2350 * the first iclog to complete flushing. Hence if all iclogs are syncing,
2352 * this semaphore will cause new writes to sleep until a sync completes. 2351 * we will wait here and all new writes will sleep until a sync completes.
2353 * Otherwise, this code just does p() followed by v(). This approximates
2354 * a sleep/wakeup except we can't race.
2355 * 2352 *
2356 * The in-core logs are used in a circular fashion. They are not used 2353 * The in-core logs are used in a circular fashion. They are not used
2357 * out-of-order even when an iclog past the head is free. 2354 * out-of-order even when an iclog past the head is free.
@@ -2508,7 +2505,7 @@ xlog_grant_log_space(xlog_t *log,
2508 goto error_return; 2505 goto error_return;
2509 2506
2510 XFS_STATS_INC(xs_sleep_logspace); 2507 XFS_STATS_INC(xs_sleep_logspace);
2511 sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); 2508 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2512 /* 2509 /*
2513 * If we got an error, and the filesystem is shutting down, 2510 * If we got an error, and the filesystem is shutting down,
2514 * we'll catch it down below. So just continue... 2511 * we'll catch it down below. So just continue...
@@ -2534,7 +2531,7 @@ redo:
2534 xlog_trace_loggrant(log, tic, 2531 xlog_trace_loggrant(log, tic,
2535 "xlog_grant_log_space: sleep 2"); 2532 "xlog_grant_log_space: sleep 2");
2536 XFS_STATS_INC(xs_sleep_logspace); 2533 XFS_STATS_INC(xs_sleep_logspace);
2537 sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); 2534 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2538 2535
2539 if (XLOG_FORCED_SHUTDOWN(log)) { 2536 if (XLOG_FORCED_SHUTDOWN(log)) {
2540 spin_lock(&log->l_grant_lock); 2537 spin_lock(&log->l_grant_lock);
@@ -2633,7 +2630,7 @@ xlog_regrant_write_log_space(xlog_t *log,
2633 if (free_bytes < ntic->t_unit_res) 2630 if (free_bytes < ntic->t_unit_res)
2634 break; 2631 break;
2635 free_bytes -= ntic->t_unit_res; 2632 free_bytes -= ntic->t_unit_res;
2636 sv_signal(&ntic->t_sema); 2633 sv_signal(&ntic->t_wait);
2637 ntic = ntic->t_next; 2634 ntic = ntic->t_next;
2638 } while (ntic != log->l_write_headq); 2635 } while (ntic != log->l_write_headq);
2639 2636
@@ -2644,7 +2641,7 @@ xlog_regrant_write_log_space(xlog_t *log,
2644 xlog_trace_loggrant(log, tic, 2641 xlog_trace_loggrant(log, tic,
2645 "xlog_regrant_write_log_space: sleep 1"); 2642 "xlog_regrant_write_log_space: sleep 1");
2646 XFS_STATS_INC(xs_sleep_logspace); 2643 XFS_STATS_INC(xs_sleep_logspace);
2647 sv_wait(&tic->t_sema, PINOD|PLTWAIT, 2644 sv_wait(&tic->t_wait, PINOD|PLTWAIT,
2648 &log->l_grant_lock, s); 2645 &log->l_grant_lock, s);
2649 2646
2650 /* If we're shutting down, this tic is already 2647 /* If we're shutting down, this tic is already
@@ -2673,7 +2670,7 @@ redo:
2673 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2670 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2674 xlog_ins_ticketq(&log->l_write_headq, tic); 2671 xlog_ins_ticketq(&log->l_write_headq, tic);
2675 XFS_STATS_INC(xs_sleep_logspace); 2672 XFS_STATS_INC(xs_sleep_logspace);
2676 sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); 2673 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2677 2674
2678 /* If we're shutting down, this tic is already off the queue */ 2675 /* If we're shutting down, this tic is already off the queue */
2679 if (XLOG_FORCED_SHUTDOWN(log)) { 2676 if (XLOG_FORCED_SHUTDOWN(log)) {
@@ -2916,7 +2913,7 @@ xlog_state_switch_iclogs(xlog_t *log,
2916 * 2. the current iclog is drity, and the previous iclog is in the 2913 * 2. the current iclog is drity, and the previous iclog is in the
2917 * active or dirty state. 2914 * active or dirty state.
2918 * 2915 *
2919 * We may sleep (call psema) if: 2916 * We may sleep if:
2920 * 2917 *
2921 * 1. the current iclog is not in the active nor dirty state. 2918 * 1. the current iclog is not in the active nor dirty state.
2922 * 2. the current iclog dirty, and the previous iclog is not in the 2919 * 2. the current iclog dirty, and the previous iclog is not in the
@@ -3013,7 +3010,7 @@ maybe_sleep:
3013 return XFS_ERROR(EIO); 3010 return XFS_ERROR(EIO);
3014 } 3011 }
3015 XFS_STATS_INC(xs_log_force_sleep); 3012 XFS_STATS_INC(xs_log_force_sleep);
3016 sv_wait(&iclog->ic_forcesema, PINOD, &log->l_icloglock, s); 3013 sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s);
3017 /* 3014 /*
3018 * No need to grab the log lock here since we're 3015 * No need to grab the log lock here since we're
3019 * only deciding whether or not to return EIO 3016 * only deciding whether or not to return EIO
@@ -3096,7 +3093,7 @@ try_again:
3096 XLOG_STATE_SYNCING))) { 3093 XLOG_STATE_SYNCING))) {
3097 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); 3094 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
3098 XFS_STATS_INC(xs_log_force_sleep); 3095 XFS_STATS_INC(xs_log_force_sleep);
3099 sv_wait(&iclog->ic_prev->ic_writesema, PSWP, 3096 sv_wait(&iclog->ic_prev->ic_write_wait, PSWP,
3100 &log->l_icloglock, s); 3097 &log->l_icloglock, s);
3101 *log_flushed = 1; 3098 *log_flushed = 1;
3102 already_slept = 1; 3099 already_slept = 1;
@@ -3116,7 +3113,7 @@ try_again:
3116 !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { 3113 !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {
3117 3114
3118 /* 3115 /*
3119 * Don't wait on the forcesema if we know that we've 3116 * Don't wait on completion if we know that we've
3120 * gotten a log write error. 3117 * gotten a log write error.
3121 */ 3118 */
3122 if (iclog->ic_state & XLOG_STATE_IOERROR) { 3119 if (iclog->ic_state & XLOG_STATE_IOERROR) {
@@ -3124,7 +3121,7 @@ try_again:
3124 return XFS_ERROR(EIO); 3121 return XFS_ERROR(EIO);
3125 } 3122 }
3126 XFS_STATS_INC(xs_log_force_sleep); 3123 XFS_STATS_INC(xs_log_force_sleep);
3127 sv_wait(&iclog->ic_forcesema, PSWP, &log->l_icloglock, s); 3124 sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
3128 /* 3125 /*
3129 * No need to grab the log lock here since we're 3126 * No need to grab the log lock here since we're
3130 * only deciding whether or not to return EIO 3127 * only deciding whether or not to return EIO
@@ -3180,7 +3177,7 @@ STATIC void
3180xlog_ticket_put(xlog_t *log, 3177xlog_ticket_put(xlog_t *log,
3181 xlog_ticket_t *ticket) 3178 xlog_ticket_t *ticket)
3182{ 3179{
3183 sv_destroy(&ticket->t_sema); 3180 sv_destroy(&ticket->t_wait);
3184 kmem_zone_free(xfs_log_ticket_zone, ticket); 3181 kmem_zone_free(xfs_log_ticket_zone, ticket);
3185} /* xlog_ticket_put */ 3182} /* xlog_ticket_put */
3186 3183
@@ -3270,7 +3267,7 @@ xlog_ticket_get(xlog_t *log,
3270 tic->t_trans_type = 0; 3267 tic->t_trans_type = 0;
3271 if (xflags & XFS_LOG_PERM_RESERV) 3268 if (xflags & XFS_LOG_PERM_RESERV)
3272 tic->t_flags |= XLOG_TIC_PERM_RESERV; 3269 tic->t_flags |= XLOG_TIC_PERM_RESERV;
3273 sv_init(&(tic->t_sema), SV_DEFAULT, "logtick"); 3270 sv_init(&(tic->t_wait), SV_DEFAULT, "logtick");
3274 3271
3275 xlog_tic_reset_res(tic); 3272 xlog_tic_reset_res(tic);
3276 3273
@@ -3557,14 +3554,14 @@ xfs_log_force_umount(
3557 */ 3554 */
3558 if ((tic = log->l_reserve_headq)) { 3555 if ((tic = log->l_reserve_headq)) {
3559 do { 3556 do {
3560 sv_signal(&tic->t_sema); 3557 sv_signal(&tic->t_wait);
3561 tic = tic->t_next; 3558 tic = tic->t_next;
3562 } while (tic != log->l_reserve_headq); 3559 } while (tic != log->l_reserve_headq);
3563 } 3560 }
3564 3561
3565 if ((tic = log->l_write_headq)) { 3562 if ((tic = log->l_write_headq)) {
3566 do { 3563 do {
3567 sv_signal(&tic->t_sema); 3564 sv_signal(&tic->t_wait);
3568 tic = tic->t_next; 3565 tic = tic->t_next;
3569 } while (tic != log->l_write_headq); 3566 } while (tic != log->l_write_headq);
3570 } 3567 }
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 6245913196b4..7dcf11e0b2f0 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -241,7 +241,7 @@ typedef struct xlog_res {
241} xlog_res_t; 241} xlog_res_t;
242 242
243typedef struct xlog_ticket { 243typedef struct xlog_ticket {
244 sv_t t_sema; /* sleep on this semaphore : 20 */ 244 sv_t t_wait; /* ticket wait queue : 20 */
245 struct xlog_ticket *t_next; /* :4|8 */ 245 struct xlog_ticket *t_next; /* :4|8 */
246 struct xlog_ticket *t_prev; /* :4|8 */ 246 struct xlog_ticket *t_prev; /* :4|8 */
247 xlog_tid_t t_tid; /* transaction identifier : 4 */ 247 xlog_tid_t t_tid; /* transaction identifier : 4 */
@@ -314,7 +314,7 @@ typedef struct xlog_rec_ext_header {
314 * xlog_rec_header_t into the reserved space. 314 * xlog_rec_header_t into the reserved space.
315 * - ic_data follows, so a write to disk can start at the beginning of 315 * - ic_data follows, so a write to disk can start at the beginning of
316 * the iclog. 316 * the iclog.
317 * - ic_forcesema is used to implement synchronous forcing of the iclog to disk. 317 * - ic_forcewait is used to implement synchronous forcing of the iclog to disk.
318 * - ic_next is the pointer to the next iclog in the ring. 318 * - ic_next is the pointer to the next iclog in the ring.
319 * - ic_bp is a pointer to the buffer used to write this incore log to disk. 319 * - ic_bp is a pointer to the buffer used to write this incore log to disk.
320 * - ic_log is a pointer back to the global log structure. 320 * - ic_log is a pointer back to the global log structure.
@@ -339,8 +339,8 @@ typedef struct xlog_rec_ext_header {
339 * and move everything else out to subsequent cachelines. 339 * and move everything else out to subsequent cachelines.
340 */ 340 */
341typedef struct xlog_iclog_fields { 341typedef struct xlog_iclog_fields {
342 sv_t ic_forcesema; 342 sv_t ic_force_wait;
343 sv_t ic_writesema; 343 sv_t ic_write_wait;
344 struct xlog_in_core *ic_next; 344 struct xlog_in_core *ic_next;
345 struct xlog_in_core *ic_prev; 345 struct xlog_in_core *ic_prev;
346 struct xfs_buf *ic_bp; 346 struct xfs_buf *ic_bp;
@@ -377,8 +377,8 @@ typedef struct xlog_in_core {
377/* 377/*
378 * Defines to save our code from this glop. 378 * Defines to save our code from this glop.
379 */ 379 */
380#define ic_forcesema hic_fields.ic_forcesema 380#define ic_force_wait hic_fields.ic_force_wait
381#define ic_writesema hic_fields.ic_writesema 381#define ic_write_wait hic_fields.ic_write_wait
382#define ic_next hic_fields.ic_next 382#define ic_next hic_fields.ic_next
383#define ic_prev hic_fields.ic_prev 383#define ic_prev hic_fields.ic_prev
384#define ic_bp hic_fields.ic_bp 384#define ic_bp hic_fields.ic_bp