aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_log.c')
-rw-r--r--fs/xfs/xfs_log.c312
1 files changed, 153 insertions, 159 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 20118ddadef6..4f16be4b6ee5 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -79,11 +79,6 @@ STATIC int xlog_state_release_iclog(xlog_t *log,
79STATIC void xlog_state_switch_iclogs(xlog_t *log, 79STATIC void xlog_state_switch_iclogs(xlog_t *log,
80 xlog_in_core_t *iclog, 80 xlog_in_core_t *iclog,
81 int eventual_size); 81 int eventual_size);
82STATIC int xlog_state_sync(xlog_t *log,
83 xfs_lsn_t lsn,
84 uint flags,
85 int *log_flushed);
86STATIC int xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed);
87STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); 82STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);
88 83
89/* local functions to manipulate grant head */ 84/* local functions to manipulate grant head */
@@ -296,65 +291,6 @@ xfs_log_done(xfs_mount_t *mp,
296 return lsn; 291 return lsn;
297} /* xfs_log_done */ 292} /* xfs_log_done */
298 293
299
300/*
301 * Force the in-core log to disk. If flags == XFS_LOG_SYNC,
302 * the force is done synchronously.
303 *
304 * Asynchronous forces are implemented by setting the WANT_SYNC
305 * bit in the appropriate in-core log and then returning.
306 *
307 * Synchronous forces are implemented with a signal variable. All callers
308 * to force a given lsn to disk will wait on a the sv attached to the
309 * specific in-core log. When given in-core log finally completes its
310 * write to disk, that thread will wake up all threads waiting on the
311 * sv.
312 */
313int
314_xfs_log_force(
315 xfs_mount_t *mp,
316 xfs_lsn_t lsn,
317 uint flags,
318 int *log_flushed)
319{
320 xlog_t *log = mp->m_log;
321 int dummy;
322
323 if (!log_flushed)
324 log_flushed = &dummy;
325
326 ASSERT(flags & XFS_LOG_FORCE);
327
328 XFS_STATS_INC(xs_log_force);
329
330 if (log->l_flags & XLOG_IO_ERROR)
331 return XFS_ERROR(EIO);
332 if (lsn == 0)
333 return xlog_state_sync_all(log, flags, log_flushed);
334 else
335 return xlog_state_sync(log, lsn, flags, log_flushed);
336} /* _xfs_log_force */
337
338/*
339 * Wrapper for _xfs_log_force(), to be used when caller doesn't care
340 * about errors or whether the log was flushed or not. This is the normal
341 * interface to use when trying to unpin items or move the log forward.
342 */
343void
344xfs_log_force(
345 xfs_mount_t *mp,
346 xfs_lsn_t lsn,
347 uint flags)
348{
349 int error;
350 error = _xfs_log_force(mp, lsn, flags, NULL);
351 if (error) {
352 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
353 "error %d returned.", error);
354 }
355}
356
357
358/* 294/*
359 * Attaches a new iclog I/O completion callback routine during 295 * Attaches a new iclog I/O completion callback routine during
360 * transaction commit. If the log is in error state, a non-zero 296 * transaction commit. If the log is in error state, a non-zero
@@ -601,7 +537,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
601 if (mp->m_flags & XFS_MOUNT_RDONLY) 537 if (mp->m_flags & XFS_MOUNT_RDONLY)
602 return 0; 538 return 0;
603 539
604 error = _xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC, NULL); 540 error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
605 ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); 541 ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log)));
606 542
607#ifdef DEBUG 543#ifdef DEBUG
@@ -2853,7 +2789,6 @@ xlog_state_switch_iclogs(xlog_t *log,
2853 log->l_iclog = iclog->ic_next; 2789 log->l_iclog = iclog->ic_next;
2854} /* xlog_state_switch_iclogs */ 2790} /* xlog_state_switch_iclogs */
2855 2791
2856
2857/* 2792/*
2858 * Write out all data in the in-core log as of this exact moment in time. 2793 * Write out all data in the in-core log as of this exact moment in time.
2859 * 2794 *
@@ -2881,11 +2816,17 @@ xlog_state_switch_iclogs(xlog_t *log,
2881 * b) when we return from flushing out this iclog, it is still 2816 * b) when we return from flushing out this iclog, it is still
2882 * not in the active nor dirty state. 2817 * not in the active nor dirty state.
2883 */ 2818 */
2884STATIC int 2819int
2885xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) 2820_xfs_log_force(
2821 struct xfs_mount *mp,
2822 uint flags,
2823 int *log_flushed)
2886{ 2824{
2887 xlog_in_core_t *iclog; 2825 struct log *log = mp->m_log;
2888 xfs_lsn_t lsn; 2826 struct xlog_in_core *iclog;
2827 xfs_lsn_t lsn;
2828
2829 XFS_STATS_INC(xs_log_force);
2889 2830
2890 spin_lock(&log->l_icloglock); 2831 spin_lock(&log->l_icloglock);
2891 2832
@@ -2931,7 +2872,9 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
2931 2872
2932 if (xlog_state_release_iclog(log, iclog)) 2873 if (xlog_state_release_iclog(log, iclog))
2933 return XFS_ERROR(EIO); 2874 return XFS_ERROR(EIO);
2934 *log_flushed = 1; 2875
2876 if (log_flushed)
2877 *log_flushed = 1;
2935 spin_lock(&log->l_icloglock); 2878 spin_lock(&log->l_icloglock);
2936 if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn && 2879 if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn &&
2937 iclog->ic_state != XLOG_STATE_DIRTY) 2880 iclog->ic_state != XLOG_STATE_DIRTY)
@@ -2975,19 +2918,37 @@ maybe_sleep:
2975 */ 2918 */
2976 if (iclog->ic_state & XLOG_STATE_IOERROR) 2919 if (iclog->ic_state & XLOG_STATE_IOERROR)
2977 return XFS_ERROR(EIO); 2920 return XFS_ERROR(EIO);
2978 *log_flushed = 1; 2921 if (log_flushed)
2979 2922 *log_flushed = 1;
2980 } else { 2923 } else {
2981 2924
2982no_sleep: 2925no_sleep:
2983 spin_unlock(&log->l_icloglock); 2926 spin_unlock(&log->l_icloglock);
2984 } 2927 }
2985 return 0; 2928 return 0;
2986} /* xlog_state_sync_all */ 2929}
2987 2930
2931/*
2932 * Wrapper for _xfs_log_force(), to be used when caller doesn't care
2933 * about errors or whether the log was flushed or not. This is the normal
2934 * interface to use when trying to unpin items or move the log forward.
2935 */
2936void
2937xfs_log_force(
2938 xfs_mount_t *mp,
2939 uint flags)
2940{
2941 int error;
2942
2943 error = _xfs_log_force(mp, flags, NULL);
2944 if (error) {
2945 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
2946 "error %d returned.", error);
2947 }
2948}
2988 2949
2989/* 2950/*
2990 * Used by code which implements synchronous log forces. 2951 * Force the in-core log to disk for a specific LSN.
2991 * 2952 *
2992 * Find in-core log with lsn. 2953 * Find in-core log with lsn.
2993 * If it is in the DIRTY state, just return. 2954 * If it is in the DIRTY state, just return.
@@ -2995,109 +2956,142 @@ no_sleep:
2995 * state and go to sleep or return. 2956 * state and go to sleep or return.
2996 * If it is in any other state, go to sleep or return. 2957 * If it is in any other state, go to sleep or return.
2997 * 2958 *
2998 * If filesystem activity goes to zero, the iclog will get flushed only by 2959 * Synchronous forces are implemented with a signal variable. All callers
2999 * bdflush(). 2960 * to force a given lsn to disk will wait on a the sv attached to the
2961 * specific in-core log. When given in-core log finally completes its
2962 * write to disk, that thread will wake up all threads waiting on the
2963 * sv.
3000 */ 2964 */
3001STATIC int 2965int
3002xlog_state_sync(xlog_t *log, 2966_xfs_log_force_lsn(
3003 xfs_lsn_t lsn, 2967 struct xfs_mount *mp,
3004 uint flags, 2968 xfs_lsn_t lsn,
3005 int *log_flushed) 2969 uint flags,
2970 int *log_flushed)
3006{ 2971{
3007 xlog_in_core_t *iclog; 2972 struct log *log = mp->m_log;
3008 int already_slept = 0; 2973 struct xlog_in_core *iclog;
3009 2974 int already_slept = 0;
3010try_again:
3011 spin_lock(&log->l_icloglock);
3012 iclog = log->l_iclog;
3013 2975
3014 if (iclog->ic_state & XLOG_STATE_IOERROR) { 2976 ASSERT(lsn != 0);
3015 spin_unlock(&log->l_icloglock);
3016 return XFS_ERROR(EIO);
3017 }
3018 2977
3019 do { 2978 XFS_STATS_INC(xs_log_force);
3020 if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
3021 iclog = iclog->ic_next;
3022 continue;
3023 }
3024 2979
3025 if (iclog->ic_state == XLOG_STATE_DIRTY) { 2980try_again:
2981 spin_lock(&log->l_icloglock);
2982 iclog = log->l_iclog;
2983 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3026 spin_unlock(&log->l_icloglock); 2984 spin_unlock(&log->l_icloglock);
3027 return 0; 2985 return XFS_ERROR(EIO);
3028 } 2986 }
3029 2987
3030 if (iclog->ic_state == XLOG_STATE_ACTIVE) { 2988 do {
3031 /* 2989 if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
3032 * We sleep here if we haven't already slept (e.g. 2990 iclog = iclog->ic_next;
3033 * this is the first time we've looked at the correct 2991 continue;
3034 * iclog buf) and the buffer before us is going to 2992 }
3035 * be sync'ed. The reason for this is that if we 2993
3036 * are doing sync transactions here, by waiting for 2994 if (iclog->ic_state == XLOG_STATE_DIRTY) {
3037 * the previous I/O to complete, we can allow a few 2995 spin_unlock(&log->l_icloglock);
3038 * more transactions into this iclog before we close 2996 return 0;
3039 * it down. 2997 }
3040 * 2998
3041 * Otherwise, we mark the buffer WANT_SYNC, and bump 2999 if (iclog->ic_state == XLOG_STATE_ACTIVE) {
3042 * up the refcnt so we can release the log (which drops 3000 /*
3043 * the ref count). The state switch keeps new transaction 3001 * We sleep here if we haven't already slept (e.g.
3044 * commits from using this buffer. When the current commits 3002 * this is the first time we've looked at the correct
3045 * finish writing into the buffer, the refcount will drop to 3003 * iclog buf) and the buffer before us is going to
3046 * zero and the buffer will go out then. 3004 * be sync'ed. The reason for this is that if we
3047 */ 3005 * are doing sync transactions here, by waiting for
3048 if (!already_slept && 3006 * the previous I/O to complete, we can allow a few
3049 (iclog->ic_prev->ic_state & (XLOG_STATE_WANT_SYNC | 3007 * more transactions into this iclog before we close
3050 XLOG_STATE_SYNCING))) { 3008 * it down.
3051 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); 3009 *
3052 XFS_STATS_INC(xs_log_force_sleep); 3010 * Otherwise, we mark the buffer WANT_SYNC, and bump
3053 sv_wait(&iclog->ic_prev->ic_write_wait, PSWP, 3011 * up the refcnt so we can release the log (which
3054 &log->l_icloglock, s); 3012 * drops the ref count). The state switch keeps new
3055 *log_flushed = 1; 3013 * transaction commits from using this buffer. When
3056 already_slept = 1; 3014 * the current commits finish writing into the buffer,
3057 goto try_again; 3015 * the refcount will drop to zero and the buffer will
3058 } else { 3016 * go out then.
3017 */
3018 if (!already_slept &&
3019 (iclog->ic_prev->ic_state &
3020 (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
3021 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
3022
3023 XFS_STATS_INC(xs_log_force_sleep);
3024
3025 sv_wait(&iclog->ic_prev->ic_write_wait,
3026 PSWP, &log->l_icloglock, s);
3027 if (log_flushed)
3028 *log_flushed = 1;
3029 already_slept = 1;
3030 goto try_again;
3031 }
3059 atomic_inc(&iclog->ic_refcnt); 3032 atomic_inc(&iclog->ic_refcnt);
3060 xlog_state_switch_iclogs(log, iclog, 0); 3033 xlog_state_switch_iclogs(log, iclog, 0);
3061 spin_unlock(&log->l_icloglock); 3034 spin_unlock(&log->l_icloglock);
3062 if (xlog_state_release_iclog(log, iclog)) 3035 if (xlog_state_release_iclog(log, iclog))
3063 return XFS_ERROR(EIO); 3036 return XFS_ERROR(EIO);
3064 *log_flushed = 1; 3037 if (log_flushed)
3038 *log_flushed = 1;
3065 spin_lock(&log->l_icloglock); 3039 spin_lock(&log->l_icloglock);
3066 } 3040 }
3067 }
3068 3041
3069 if ((flags & XFS_LOG_SYNC) && /* sleep */ 3042 if ((flags & XFS_LOG_SYNC) && /* sleep */
3070 !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { 3043 !(iclog->ic_state &
3044 (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {
3045 /*
3046 * Don't wait on completion if we know that we've
3047 * gotten a log write error.
3048 */
3049 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3050 spin_unlock(&log->l_icloglock);
3051 return XFS_ERROR(EIO);
3052 }
3053 XFS_STATS_INC(xs_log_force_sleep);
3054 sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
3055 /*
3056 * No need to grab the log lock here since we're
3057 * only deciding whether or not to return EIO
3058 * and the memory read should be atomic.
3059 */
3060 if (iclog->ic_state & XLOG_STATE_IOERROR)
3061 return XFS_ERROR(EIO);
3071 3062
3072 /* 3063 if (log_flushed)
3073 * Don't wait on completion if we know that we've 3064 *log_flushed = 1;
3074 * gotten a log write error. 3065 } else { /* just return */
3075 */
3076 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3077 spin_unlock(&log->l_icloglock); 3066 spin_unlock(&log->l_icloglock);
3078 return XFS_ERROR(EIO);
3079 } 3067 }
3080 XFS_STATS_INC(xs_log_force_sleep);
3081 sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
3082 /*
3083 * No need to grab the log lock here since we're
3084 * only deciding whether or not to return EIO
3085 * and the memory read should be atomic.
3086 */
3087 if (iclog->ic_state & XLOG_STATE_IOERROR)
3088 return XFS_ERROR(EIO);
3089 *log_flushed = 1;
3090 } else { /* just return */
3091 spin_unlock(&log->l_icloglock);
3092 }
3093 return 0;
3094 3068
3095 } while (iclog != log->l_iclog); 3069 return 0;
3070 } while (iclog != log->l_iclog);
3096 3071
3097 spin_unlock(&log->l_icloglock); 3072 spin_unlock(&log->l_icloglock);
3098 return 0; 3073 return 0;
3099} /* xlog_state_sync */ 3074}
3075
3076/*
3077 * Wrapper for _xfs_log_force_lsn(), to be used when caller doesn't care
3078 * about errors or whether the log was flushed or not. This is the normal
3079 * interface to use when trying to unpin items or move the log forward.
3080 */
3081void
3082xfs_log_force_lsn(
3083 xfs_mount_t *mp,
3084 xfs_lsn_t lsn,
3085 uint flags)
3086{
3087 int error;
3100 3088
3089 error = _xfs_log_force_lsn(mp, lsn, flags, NULL);
3090 if (error) {
3091 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
3092 "error %d returned.", error);
3093 }
3094}
3101 3095
3102/* 3096/*
3103 * Called when we want to mark the current iclog as being ready to sync to 3097 * Called when we want to mark the current iclog as being ready to sync to
@@ -3462,7 +3456,6 @@ xfs_log_force_umount(
3462 xlog_ticket_t *tic; 3456 xlog_ticket_t *tic;
3463 xlog_t *log; 3457 xlog_t *log;
3464 int retval; 3458 int retval;
3465 int dummy;
3466 3459
3467 log = mp->m_log; 3460 log = mp->m_log;
3468 3461
@@ -3536,13 +3529,14 @@ xfs_log_force_umount(
3536 } 3529 }
3537 spin_unlock(&log->l_grant_lock); 3530 spin_unlock(&log->l_grant_lock);
3538 3531
3539 if (! (log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { 3532 if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
3540 ASSERT(!logerror); 3533 ASSERT(!logerror);
3541 /* 3534 /*
3542 * Force the incore logs to disk before shutting the 3535 * Force the incore logs to disk before shutting the
3543 * log down completely. 3536 * log down completely.
3544 */ 3537 */
3545 xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC, &dummy); 3538 _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
3539
3546 spin_lock(&log->l_icloglock); 3540 spin_lock(&log->l_icloglock);
3547 retval = xlog_state_ioerror(log); 3541 retval = xlog_state_ioerror(log);
3548 spin_unlock(&log->l_icloglock); 3542 spin_unlock(&log->l_icloglock);