aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_log.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-08-13 18:17:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-08-13 18:17:49 -0400
commit9ea319b61613085f501a79cf8d405cb221d084f3 (patch)
tree5bf7e1b9f104a0df029d355927fa9eb398db37bb /fs/xfs/xfs_log.c
parent3e11acd4306d558249c31cf6cac09f218f2de52e (diff)
parentc6a7b0f8a49aa71792dd108efc535435f462bf79 (diff)
Merge git://oss.sgi.com:8090/xfs/linux-2.6
* git://oss.sgi.com:8090/xfs/linux-2.6: (45 commits) [XFS] Fix use after free in xfs_log_done(). [XFS] Make xfs_bmap_*_count_leaves void. [XFS] Use KM_NOFS for debug trace buffers [XFS] use KM_MAYFAIL in xfs_mountfs [XFS] refactor xfs_mount_free [XFS] don't call xfs_freesb from xfs_unmountfs [XFS] xfs_unmountfs should return void [XFS] cleanup xfs_mountfs [XFS] move root inode IRELE into xfs_unmountfs [XFS] stop using file_update_time [XFS] optimize xfs_ichgtime [XFS] update timestamp in xfs_ialloc manually [XFS] remove the sema_t from XFS. [XFS] replace dquot flush semaphore with a completion [XFS] replace inode flush semaphore with a completion [XFS] extend completions to provide XFS object flush requirements [XFS] replace the XFS buf iodone semaphore with a completion [XFS] clean up stale references to semaphores [XFS] use get_unaligned_* helpers [XFS] Fix compile failure in xfs_buf_trace() ...
Diffstat (limited to 'fs/xfs/xfs_log.c')
-rw-r--r--fs/xfs/xfs_log.c86
1 files changed, 40 insertions, 46 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 91b00a5686cd..ccba14eb9dbe 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -160,7 +160,7 @@ void
160xlog_trace_iclog(xlog_in_core_t *iclog, uint state) 160xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
161{ 161{
162 if (!iclog->ic_trace) 162 if (!iclog->ic_trace)
163 iclog->ic_trace = ktrace_alloc(256, KM_SLEEP); 163 iclog->ic_trace = ktrace_alloc(256, KM_NOFS);
164 ktrace_enter(iclog->ic_trace, 164 ktrace_enter(iclog->ic_trace,
165 (void *)((unsigned long)state), 165 (void *)((unsigned long)state),
166 (void *)((unsigned long)current_pid()), 166 (void *)((unsigned long)current_pid()),
@@ -336,15 +336,12 @@ xfs_log_done(xfs_mount_t *mp,
336 } else { 336 } else {
337 xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); 337 xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)");
338 xlog_regrant_reserve_log_space(log, ticket); 338 xlog_regrant_reserve_log_space(log, ticket);
339 } 339 /* If this ticket was a permanent reservation and we aren't
340 340 * trying to release it, reset the inited flags; so next time
341 /* If this ticket was a permanent reservation and we aren't 341 * we write, a start record will be written out.
342 * trying to release it, reset the inited flags; so next time 342 */
343 * we write, a start record will be written out.
344 */
345 if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) &&
346 (flags & XFS_LOG_REL_PERM_RESERV) == 0)
347 ticket->t_flags |= XLOG_TIC_INITED; 343 ticket->t_flags |= XLOG_TIC_INITED;
344 }
348 345
349 return lsn; 346 return lsn;
350} /* xfs_log_done */ 347} /* xfs_log_done */
@@ -357,11 +354,11 @@ xfs_log_done(xfs_mount_t *mp,
357 * Asynchronous forces are implemented by setting the WANT_SYNC 354 * Asynchronous forces are implemented by setting the WANT_SYNC
358 * bit in the appropriate in-core log and then returning. 355 * bit in the appropriate in-core log and then returning.
359 * 356 *
360 * Synchronous forces are implemented with a semaphore. All callers 357 * Synchronous forces are implemented with a signal variable. All callers
361 * to force a given lsn to disk will wait on a semaphore attached to the 358 * to force a given lsn to disk will wait on a the sv attached to the
362 * specific in-core log. When given in-core log finally completes its 359 * specific in-core log. When given in-core log finally completes its
363 * write to disk, that thread will wake up all threads waiting on the 360 * write to disk, that thread will wake up all threads waiting on the
364 * semaphore. 361 * sv.
365 */ 362 */
366int 363int
367_xfs_log_force( 364_xfs_log_force(
@@ -588,12 +585,12 @@ error:
588 * mp - ubiquitous xfs mount point structure 585 * mp - ubiquitous xfs mount point structure
589 */ 586 */
590int 587int
591xfs_log_mount_finish(xfs_mount_t *mp, int mfsi_flags) 588xfs_log_mount_finish(xfs_mount_t *mp)
592{ 589{
593 int error; 590 int error;
594 591
595 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) 592 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY))
596 error = xlog_recover_finish(mp->m_log, mfsi_flags); 593 error = xlog_recover_finish(mp->m_log);
597 else { 594 else {
598 error = 0; 595 error = 0;
599 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); 596 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
@@ -707,7 +704,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
707 if (!(iclog->ic_state == XLOG_STATE_ACTIVE || 704 if (!(iclog->ic_state == XLOG_STATE_ACTIVE ||
708 iclog->ic_state == XLOG_STATE_DIRTY)) { 705 iclog->ic_state == XLOG_STATE_DIRTY)) {
709 if (!XLOG_FORCED_SHUTDOWN(log)) { 706 if (!XLOG_FORCED_SHUTDOWN(log)) {
710 sv_wait(&iclog->ic_forcesema, PMEM, 707 sv_wait(&iclog->ic_force_wait, PMEM,
711 &log->l_icloglock, s); 708 &log->l_icloglock, s);
712 } else { 709 } else {
713 spin_unlock(&log->l_icloglock); 710 spin_unlock(&log->l_icloglock);
@@ -748,7 +745,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
748 || iclog->ic_state == XLOG_STATE_DIRTY 745 || iclog->ic_state == XLOG_STATE_DIRTY
749 || iclog->ic_state == XLOG_STATE_IOERROR) ) { 746 || iclog->ic_state == XLOG_STATE_IOERROR) ) {
750 747
751 sv_wait(&iclog->ic_forcesema, PMEM, 748 sv_wait(&iclog->ic_force_wait, PMEM,
752 &log->l_icloglock, s); 749 &log->l_icloglock, s);
753 } else { 750 } else {
754 spin_unlock(&log->l_icloglock); 751 spin_unlock(&log->l_icloglock);
@@ -838,7 +835,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
838 break; 835 break;
839 tail_lsn = 0; 836 tail_lsn = 0;
840 free_bytes -= tic->t_unit_res; 837 free_bytes -= tic->t_unit_res;
841 sv_signal(&tic->t_sema); 838 sv_signal(&tic->t_wait);
842 tic = tic->t_next; 839 tic = tic->t_next;
843 } while (tic != log->l_write_headq); 840 } while (tic != log->l_write_headq);
844 } 841 }
@@ -859,7 +856,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
859 break; 856 break;
860 tail_lsn = 0; 857 tail_lsn = 0;
861 free_bytes -= need_bytes; 858 free_bytes -= need_bytes;
862 sv_signal(&tic->t_sema); 859 sv_signal(&tic->t_wait);
863 tic = tic->t_next; 860 tic = tic->t_next;
864 } while (tic != log->l_reserve_headq); 861 } while (tic != log->l_reserve_headq);
865 } 862 }
@@ -1285,8 +1282,8 @@ xlog_alloc_log(xfs_mount_t *mp,
1285 1282
1286 ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); 1283 ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
1287 ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); 1284 ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
1288 sv_init(&iclog->ic_forcesema, SV_DEFAULT, "iclog-force"); 1285 sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force");
1289 sv_init(&iclog->ic_writesema, SV_DEFAULT, "iclog-write"); 1286 sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write");
1290 1287
1291 iclogp = &iclog->ic_next; 1288 iclogp = &iclog->ic_next;
1292 } 1289 }
@@ -1565,8 +1562,8 @@ xlog_dealloc_log(xlog_t *log)
1565 1562
1566 iclog = log->l_iclog; 1563 iclog = log->l_iclog;
1567 for (i=0; i<log->l_iclog_bufs; i++) { 1564 for (i=0; i<log->l_iclog_bufs; i++) {
1568 sv_destroy(&iclog->ic_forcesema); 1565 sv_destroy(&iclog->ic_force_wait);
1569 sv_destroy(&iclog->ic_writesema); 1566 sv_destroy(&iclog->ic_write_wait);
1570 xfs_buf_free(iclog->ic_bp); 1567 xfs_buf_free(iclog->ic_bp);
1571#ifdef XFS_LOG_TRACE 1568#ifdef XFS_LOG_TRACE
1572 if (iclog->ic_trace != NULL) { 1569 if (iclog->ic_trace != NULL) {
@@ -1976,7 +1973,7 @@ xlog_write(xfs_mount_t * mp,
1976/* Clean iclogs starting from the head. This ordering must be 1973/* Clean iclogs starting from the head. This ordering must be
1977 * maintained, so an iclog doesn't become ACTIVE beyond one that 1974 * maintained, so an iclog doesn't become ACTIVE beyond one that
1978 * is SYNCING. This is also required to maintain the notion that we use 1975 * is SYNCING. This is also required to maintain the notion that we use
1979 * a counting semaphore to hold off would be writers to the log when every 1976 * a ordered wait queue to hold off would be writers to the log when every
1980 * iclog is trying to sync to disk. 1977 * iclog is trying to sync to disk.
1981 * 1978 *
1982 * State Change: DIRTY -> ACTIVE 1979 * State Change: DIRTY -> ACTIVE
@@ -2240,7 +2237,7 @@ xlog_state_do_callback(
2240 xlog_state_clean_log(log); 2237 xlog_state_clean_log(log);
2241 2238
2242 /* wake up threads waiting in xfs_log_force() */ 2239 /* wake up threads waiting in xfs_log_force() */
2243 sv_broadcast(&iclog->ic_forcesema); 2240 sv_broadcast(&iclog->ic_force_wait);
2244 2241
2245 iclog = iclog->ic_next; 2242 iclog = iclog->ic_next;
2246 } while (first_iclog != iclog); 2243 } while (first_iclog != iclog);
@@ -2302,8 +2299,7 @@ xlog_state_do_callback(
2302 * the second completion goes through. 2299 * the second completion goes through.
2303 * 2300 *
2304 * Callbacks could take time, so they are done outside the scope of the 2301 * Callbacks could take time, so they are done outside the scope of the
2305 * global state machine log lock. Assume that the calls to cvsema won't 2302 * global state machine log lock.
2306 * take a long time. At least we know it won't sleep.
2307 */ 2303 */
2308STATIC void 2304STATIC void
2309xlog_state_done_syncing( 2305xlog_state_done_syncing(
@@ -2339,7 +2335,7 @@ xlog_state_done_syncing(
2339 * iclog buffer, we wake them all, one will get to do the 2335 * iclog buffer, we wake them all, one will get to do the
2340 * I/O, the others get to wait for the result. 2336 * I/O, the others get to wait for the result.
2341 */ 2337 */
2342 sv_broadcast(&iclog->ic_writesema); 2338 sv_broadcast(&iclog->ic_write_wait);
2343 spin_unlock(&log->l_icloglock); 2339 spin_unlock(&log->l_icloglock);
2344 xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ 2340 xlog_state_do_callback(log, aborted, iclog); /* also cleans log */
2345} /* xlog_state_done_syncing */ 2341} /* xlog_state_done_syncing */
@@ -2347,11 +2343,9 @@ xlog_state_done_syncing(
2347 2343
2348/* 2344/*
2349 * If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must 2345 * If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must
2350 * sleep. The flush semaphore is set to the number of in-core buffers and 2346 * sleep. We wait on the flush queue on the head iclog as that should be
2351 * decremented around disk syncing. Therefore, if all buffers are syncing, 2347 * the first iclog to complete flushing. Hence if all iclogs are syncing,
2352 * this semaphore will cause new writes to sleep until a sync completes. 2348 * we will wait here and all new writes will sleep until a sync completes.
2353 * Otherwise, this code just does p() followed by v(). This approximates
2354 * a sleep/wakeup except we can't race.
2355 * 2349 *
2356 * The in-core logs are used in a circular fashion. They are not used 2350 * The in-core logs are used in a circular fashion. They are not used
2357 * out-of-order even when an iclog past the head is free. 2351 * out-of-order even when an iclog past the head is free.
@@ -2508,7 +2502,7 @@ xlog_grant_log_space(xlog_t *log,
2508 goto error_return; 2502 goto error_return;
2509 2503
2510 XFS_STATS_INC(xs_sleep_logspace); 2504 XFS_STATS_INC(xs_sleep_logspace);
2511 sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); 2505 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2512 /* 2506 /*
2513 * If we got an error, and the filesystem is shutting down, 2507 * If we got an error, and the filesystem is shutting down,
2514 * we'll catch it down below. So just continue... 2508 * we'll catch it down below. So just continue...
@@ -2534,7 +2528,7 @@ redo:
2534 xlog_trace_loggrant(log, tic, 2528 xlog_trace_loggrant(log, tic,
2535 "xlog_grant_log_space: sleep 2"); 2529 "xlog_grant_log_space: sleep 2");
2536 XFS_STATS_INC(xs_sleep_logspace); 2530 XFS_STATS_INC(xs_sleep_logspace);
2537 sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); 2531 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2538 2532
2539 if (XLOG_FORCED_SHUTDOWN(log)) { 2533 if (XLOG_FORCED_SHUTDOWN(log)) {
2540 spin_lock(&log->l_grant_lock); 2534 spin_lock(&log->l_grant_lock);
@@ -2633,7 +2627,7 @@ xlog_regrant_write_log_space(xlog_t *log,
2633 if (free_bytes < ntic->t_unit_res) 2627 if (free_bytes < ntic->t_unit_res)
2634 break; 2628 break;
2635 free_bytes -= ntic->t_unit_res; 2629 free_bytes -= ntic->t_unit_res;
2636 sv_signal(&ntic->t_sema); 2630 sv_signal(&ntic->t_wait);
2637 ntic = ntic->t_next; 2631 ntic = ntic->t_next;
2638 } while (ntic != log->l_write_headq); 2632 } while (ntic != log->l_write_headq);
2639 2633
@@ -2644,7 +2638,7 @@ xlog_regrant_write_log_space(xlog_t *log,
2644 xlog_trace_loggrant(log, tic, 2638 xlog_trace_loggrant(log, tic,
2645 "xlog_regrant_write_log_space: sleep 1"); 2639 "xlog_regrant_write_log_space: sleep 1");
2646 XFS_STATS_INC(xs_sleep_logspace); 2640 XFS_STATS_INC(xs_sleep_logspace);
2647 sv_wait(&tic->t_sema, PINOD|PLTWAIT, 2641 sv_wait(&tic->t_wait, PINOD|PLTWAIT,
2648 &log->l_grant_lock, s); 2642 &log->l_grant_lock, s);
2649 2643
2650 /* If we're shutting down, this tic is already 2644 /* If we're shutting down, this tic is already
@@ -2673,7 +2667,7 @@ redo:
2673 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2667 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2674 xlog_ins_ticketq(&log->l_write_headq, tic); 2668 xlog_ins_ticketq(&log->l_write_headq, tic);
2675 XFS_STATS_INC(xs_sleep_logspace); 2669 XFS_STATS_INC(xs_sleep_logspace);
2676 sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); 2670 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2677 2671
2678 /* If we're shutting down, this tic is already off the queue */ 2672 /* If we're shutting down, this tic is already off the queue */
2679 if (XLOG_FORCED_SHUTDOWN(log)) { 2673 if (XLOG_FORCED_SHUTDOWN(log)) {
@@ -2916,7 +2910,7 @@ xlog_state_switch_iclogs(xlog_t *log,
2916 * 2. the current iclog is drity, and the previous iclog is in the 2910 * 2. the current iclog is drity, and the previous iclog is in the
2917 * active or dirty state. 2911 * active or dirty state.
2918 * 2912 *
2919 * We may sleep (call psema) if: 2913 * We may sleep if:
2920 * 2914 *
2921 * 1. the current iclog is not in the active nor dirty state. 2915 * 1. the current iclog is not in the active nor dirty state.
2922 * 2. the current iclog dirty, and the previous iclog is not in the 2916 * 2. the current iclog dirty, and the previous iclog is not in the
@@ -3013,7 +3007,7 @@ maybe_sleep:
3013 return XFS_ERROR(EIO); 3007 return XFS_ERROR(EIO);
3014 } 3008 }
3015 XFS_STATS_INC(xs_log_force_sleep); 3009 XFS_STATS_INC(xs_log_force_sleep);
3016 sv_wait(&iclog->ic_forcesema, PINOD, &log->l_icloglock, s); 3010 sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s);
3017 /* 3011 /*
3018 * No need to grab the log lock here since we're 3012 * No need to grab the log lock here since we're
3019 * only deciding whether or not to return EIO 3013 * only deciding whether or not to return EIO
@@ -3096,7 +3090,7 @@ try_again:
3096 XLOG_STATE_SYNCING))) { 3090 XLOG_STATE_SYNCING))) {
3097 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); 3091 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
3098 XFS_STATS_INC(xs_log_force_sleep); 3092 XFS_STATS_INC(xs_log_force_sleep);
3099 sv_wait(&iclog->ic_prev->ic_writesema, PSWP, 3093 sv_wait(&iclog->ic_prev->ic_write_wait, PSWP,
3100 &log->l_icloglock, s); 3094 &log->l_icloglock, s);
3101 *log_flushed = 1; 3095 *log_flushed = 1;
3102 already_slept = 1; 3096 already_slept = 1;
@@ -3116,7 +3110,7 @@ try_again:
3116 !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { 3110 !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {
3117 3111
3118 /* 3112 /*
3119 * Don't wait on the forcesema if we know that we've 3113 * Don't wait on completion if we know that we've
3120 * gotten a log write error. 3114 * gotten a log write error.
3121 */ 3115 */
3122 if (iclog->ic_state & XLOG_STATE_IOERROR) { 3116 if (iclog->ic_state & XLOG_STATE_IOERROR) {
@@ -3124,7 +3118,7 @@ try_again:
3124 return XFS_ERROR(EIO); 3118 return XFS_ERROR(EIO);
3125 } 3119 }
3126 XFS_STATS_INC(xs_log_force_sleep); 3120 XFS_STATS_INC(xs_log_force_sleep);
3127 sv_wait(&iclog->ic_forcesema, PSWP, &log->l_icloglock, s); 3121 sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
3128 /* 3122 /*
3129 * No need to grab the log lock here since we're 3123 * No need to grab the log lock here since we're
3130 * only deciding whether or not to return EIO 3124 * only deciding whether or not to return EIO
@@ -3180,7 +3174,7 @@ STATIC void
3180xlog_ticket_put(xlog_t *log, 3174xlog_ticket_put(xlog_t *log,
3181 xlog_ticket_t *ticket) 3175 xlog_ticket_t *ticket)
3182{ 3176{
3183 sv_destroy(&ticket->t_sema); 3177 sv_destroy(&ticket->t_wait);
3184 kmem_zone_free(xfs_log_ticket_zone, ticket); 3178 kmem_zone_free(xfs_log_ticket_zone, ticket);
3185} /* xlog_ticket_put */ 3179} /* xlog_ticket_put */
3186 3180
@@ -3270,7 +3264,7 @@ xlog_ticket_get(xlog_t *log,
3270 tic->t_trans_type = 0; 3264 tic->t_trans_type = 0;
3271 if (xflags & XFS_LOG_PERM_RESERV) 3265 if (xflags & XFS_LOG_PERM_RESERV)
3272 tic->t_flags |= XLOG_TIC_PERM_RESERV; 3266 tic->t_flags |= XLOG_TIC_PERM_RESERV;
3273 sv_init(&(tic->t_sema), SV_DEFAULT, "logtick"); 3267 sv_init(&(tic->t_wait), SV_DEFAULT, "logtick");
3274 3268
3275 xlog_tic_reset_res(tic); 3269 xlog_tic_reset_res(tic);
3276 3270
@@ -3557,14 +3551,14 @@ xfs_log_force_umount(
3557 */ 3551 */
3558 if ((tic = log->l_reserve_headq)) { 3552 if ((tic = log->l_reserve_headq)) {
3559 do { 3553 do {
3560 sv_signal(&tic->t_sema); 3554 sv_signal(&tic->t_wait);
3561 tic = tic->t_next; 3555 tic = tic->t_next;
3562 } while (tic != log->l_reserve_headq); 3556 } while (tic != log->l_reserve_headq);
3563 } 3557 }
3564 3558
3565 if ((tic = log->l_write_headq)) { 3559 if ((tic = log->l_write_headq)) {
3566 do { 3560 do {
3567 sv_signal(&tic->t_sema); 3561 sv_signal(&tic->t_wait);
3568 tic = tic->t_next; 3562 tic = tic->t_next;
3569 } while (tic != log->l_write_headq); 3563 } while (tic != log->l_write_headq);
3570 } 3564 }