diff options
Diffstat (limited to 'fs/xfs/xfs_log.c')
-rw-r--r-- | fs/xfs/xfs_log.c | 383 |
1 files changed, 188 insertions, 195 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 600b5b06aaeb..4f16be4b6ee5 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -50,7 +50,6 @@ kmem_zone_t *xfs_log_ticket_zone; | |||
50 | (off) += (bytes);} | 50 | (off) += (bytes);} |
51 | 51 | ||
52 | /* Local miscellaneous function prototypes */ | 52 | /* Local miscellaneous function prototypes */ |
53 | STATIC int xlog_bdstrat_cb(struct xfs_buf *); | ||
54 | STATIC int xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket, | 53 | STATIC int xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket, |
55 | xlog_in_core_t **, xfs_lsn_t *); | 54 | xlog_in_core_t **, xfs_lsn_t *); |
56 | STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, | 55 | STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, |
@@ -80,11 +79,6 @@ STATIC int xlog_state_release_iclog(xlog_t *log, | |||
80 | STATIC void xlog_state_switch_iclogs(xlog_t *log, | 79 | STATIC void xlog_state_switch_iclogs(xlog_t *log, |
81 | xlog_in_core_t *iclog, | 80 | xlog_in_core_t *iclog, |
82 | int eventual_size); | 81 | int eventual_size); |
83 | STATIC int xlog_state_sync(xlog_t *log, | ||
84 | xfs_lsn_t lsn, | ||
85 | uint flags, | ||
86 | int *log_flushed); | ||
87 | STATIC int xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed); | ||
88 | STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); | 82 | STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); |
89 | 83 | ||
90 | /* local functions to manipulate grant head */ | 84 | /* local functions to manipulate grant head */ |
@@ -297,65 +291,6 @@ xfs_log_done(xfs_mount_t *mp, | |||
297 | return lsn; | 291 | return lsn; |
298 | } /* xfs_log_done */ | 292 | } /* xfs_log_done */ |
299 | 293 | ||
300 | |||
301 | /* | ||
302 | * Force the in-core log to disk. If flags == XFS_LOG_SYNC, | ||
303 | * the force is done synchronously. | ||
304 | * | ||
305 | * Asynchronous forces are implemented by setting the WANT_SYNC | ||
306 | * bit in the appropriate in-core log and then returning. | ||
307 | * | ||
308 | * Synchronous forces are implemented with a signal variable. All callers | ||
309 | * to force a given lsn to disk will wait on a the sv attached to the | ||
310 | * specific in-core log. When given in-core log finally completes its | ||
311 | * write to disk, that thread will wake up all threads waiting on the | ||
312 | * sv. | ||
313 | */ | ||
314 | int | ||
315 | _xfs_log_force( | ||
316 | xfs_mount_t *mp, | ||
317 | xfs_lsn_t lsn, | ||
318 | uint flags, | ||
319 | int *log_flushed) | ||
320 | { | ||
321 | xlog_t *log = mp->m_log; | ||
322 | int dummy; | ||
323 | |||
324 | if (!log_flushed) | ||
325 | log_flushed = &dummy; | ||
326 | |||
327 | ASSERT(flags & XFS_LOG_FORCE); | ||
328 | |||
329 | XFS_STATS_INC(xs_log_force); | ||
330 | |||
331 | if (log->l_flags & XLOG_IO_ERROR) | ||
332 | return XFS_ERROR(EIO); | ||
333 | if (lsn == 0) | ||
334 | return xlog_state_sync_all(log, flags, log_flushed); | ||
335 | else | ||
336 | return xlog_state_sync(log, lsn, flags, log_flushed); | ||
337 | } /* _xfs_log_force */ | ||
338 | |||
339 | /* | ||
340 | * Wrapper for _xfs_log_force(), to be used when caller doesn't care | ||
341 | * about errors or whether the log was flushed or not. This is the normal | ||
342 | * interface to use when trying to unpin items or move the log forward. | ||
343 | */ | ||
344 | void | ||
345 | xfs_log_force( | ||
346 | xfs_mount_t *mp, | ||
347 | xfs_lsn_t lsn, | ||
348 | uint flags) | ||
349 | { | ||
350 | int error; | ||
351 | error = _xfs_log_force(mp, lsn, flags, NULL); | ||
352 | if (error) { | ||
353 | xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " | ||
354 | "error %d returned.", error); | ||
355 | } | ||
356 | } | ||
357 | |||
358 | |||
359 | /* | 294 | /* |
360 | * Attaches a new iclog I/O completion callback routine during | 295 | * Attaches a new iclog I/O completion callback routine during |
361 | * transaction commit. If the log is in error state, a non-zero | 296 | * transaction commit. If the log is in error state, a non-zero |
@@ -602,7 +537,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
602 | if (mp->m_flags & XFS_MOUNT_RDONLY) | 537 | if (mp->m_flags & XFS_MOUNT_RDONLY) |
603 | return 0; | 538 | return 0; |
604 | 539 | ||
605 | error = _xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC, NULL); | 540 | error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL); |
606 | ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); | 541 | ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); |
607 | 542 | ||
608 | #ifdef DEBUG | 543 | #ifdef DEBUG |
@@ -618,7 +553,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
618 | if (! (XLOG_FORCED_SHUTDOWN(log))) { | 553 | if (! (XLOG_FORCED_SHUTDOWN(log))) { |
619 | reg[0].i_addr = (void*)&magic; | 554 | reg[0].i_addr = (void*)&magic; |
620 | reg[0].i_len = sizeof(magic); | 555 | reg[0].i_len = sizeof(magic); |
621 | XLOG_VEC_SET_TYPE(®[0], XLOG_REG_TYPE_UNMOUNT); | 556 | reg[0].i_type = XLOG_REG_TYPE_UNMOUNT; |
622 | 557 | ||
623 | error = xfs_log_reserve(mp, 600, 1, &tic, | 558 | error = xfs_log_reserve(mp, 600, 1, &tic, |
624 | XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE); | 559 | XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE); |
@@ -988,35 +923,6 @@ xlog_iodone(xfs_buf_t *bp) | |||
988 | } /* xlog_iodone */ | 923 | } /* xlog_iodone */ |
989 | 924 | ||
990 | /* | 925 | /* |
991 | * The bdstrat callback function for log bufs. This gives us a central | ||
992 | * place to trap bufs in case we get hit by a log I/O error and need to | ||
993 | * shutdown. Actually, in practice, even when we didn't get a log error, | ||
994 | * we transition the iclogs to IOERROR state *after* flushing all existing | ||
995 | * iclogs to disk. This is because we don't want anymore new transactions to be | ||
996 | * started or completed afterwards. | ||
997 | */ | ||
998 | STATIC int | ||
999 | xlog_bdstrat_cb(struct xfs_buf *bp) | ||
1000 | { | ||
1001 | xlog_in_core_t *iclog; | ||
1002 | |||
1003 | iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *); | ||
1004 | |||
1005 | if ((iclog->ic_state & XLOG_STATE_IOERROR) == 0) { | ||
1006 | /* note for irix bstrat will need struct bdevsw passed | ||
1007 | * Fix the following macro if the code ever is merged | ||
1008 | */ | ||
1009 | XFS_bdstrat(bp); | ||
1010 | return 0; | ||
1011 | } | ||
1012 | |||
1013 | XFS_BUF_ERROR(bp, EIO); | ||
1014 | XFS_BUF_STALE(bp); | ||
1015 | xfs_biodone(bp); | ||
1016 | return XFS_ERROR(EIO); | ||
1017 | } | ||
1018 | |||
1019 | /* | ||
1020 | * Return size of each in-core log record buffer. | 926 | * Return size of each in-core log record buffer. |
1021 | * | 927 | * |
1022 | * All machines get 8 x 32kB buffers by default, unless tuned otherwise. | 928 | * All machines get 8 x 32kB buffers by default, unless tuned otherwise. |
@@ -1158,7 +1064,6 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1158 | if (!bp) | 1064 | if (!bp) |
1159 | goto out_free_log; | 1065 | goto out_free_log; |
1160 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); | 1066 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); |
1161 | XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); | ||
1162 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); | 1067 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); |
1163 | ASSERT(XFS_BUF_ISBUSY(bp)); | 1068 | ASSERT(XFS_BUF_ISBUSY(bp)); |
1164 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); | 1069 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); |
@@ -1196,7 +1101,6 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1196 | if (!XFS_BUF_CPSEMA(bp)) | 1101 | if (!XFS_BUF_CPSEMA(bp)) |
1197 | ASSERT(0); | 1102 | ASSERT(0); |
1198 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); | 1103 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); |
1199 | XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); | ||
1200 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); | 1104 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); |
1201 | iclog->ic_bp = bp; | 1105 | iclog->ic_bp = bp; |
1202 | iclog->ic_data = bp->b_addr; | 1106 | iclog->ic_data = bp->b_addr; |
@@ -1268,7 +1172,7 @@ xlog_commit_record(xfs_mount_t *mp, | |||
1268 | 1172 | ||
1269 | reg[0].i_addr = NULL; | 1173 | reg[0].i_addr = NULL; |
1270 | reg[0].i_len = 0; | 1174 | reg[0].i_len = 0; |
1271 | XLOG_VEC_SET_TYPE(®[0], XLOG_REG_TYPE_COMMIT); | 1175 | reg[0].i_type = XLOG_REG_TYPE_COMMIT; |
1272 | 1176 | ||
1273 | ASSERT_ALWAYS(iclog); | 1177 | ASSERT_ALWAYS(iclog); |
1274 | if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, | 1178 | if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, |
@@ -1343,6 +1247,37 @@ xlog_grant_push_ail(xfs_mount_t *mp, | |||
1343 | xfs_trans_ail_push(log->l_ailp, threshold_lsn); | 1247 | xfs_trans_ail_push(log->l_ailp, threshold_lsn); |
1344 | } /* xlog_grant_push_ail */ | 1248 | } /* xlog_grant_push_ail */ |
1345 | 1249 | ||
1250 | /* | ||
1251 | * The bdstrat callback function for log bufs. This gives us a central | ||
1252 | * place to trap bufs in case we get hit by a log I/O error and need to | ||
1253 | * shutdown. Actually, in practice, even when we didn't get a log error, | ||
1254 | * we transition the iclogs to IOERROR state *after* flushing all existing | ||
1255 | * iclogs to disk. This is because we don't want anymore new transactions to be | ||
1256 | * started or completed afterwards. | ||
1257 | */ | ||
1258 | STATIC int | ||
1259 | xlog_bdstrat( | ||
1260 | struct xfs_buf *bp) | ||
1261 | { | ||
1262 | struct xlog_in_core *iclog; | ||
1263 | |||
1264 | iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *); | ||
1265 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | ||
1266 | XFS_BUF_ERROR(bp, EIO); | ||
1267 | XFS_BUF_STALE(bp); | ||
1268 | xfs_biodone(bp); | ||
1269 | /* | ||
1270 | * It would seem logical to return EIO here, but we rely on | ||
1271 | * the log state machine to propagate I/O errors instead of | ||
1272 | * doing it here. | ||
1273 | */ | ||
1274 | return 0; | ||
1275 | } | ||
1276 | |||
1277 | bp->b_flags |= _XBF_RUN_QUEUES; | ||
1278 | xfs_buf_iorequest(bp); | ||
1279 | return 0; | ||
1280 | } | ||
1346 | 1281 | ||
1347 | /* | 1282 | /* |
1348 | * Flush out the in-core log (iclog) to the on-disk log in an asynchronous | 1283 | * Flush out the in-core log (iclog) to the on-disk log in an asynchronous |
@@ -1462,7 +1397,7 @@ xlog_sync(xlog_t *log, | |||
1462 | */ | 1397 | */ |
1463 | XFS_BUF_WRITE(bp); | 1398 | XFS_BUF_WRITE(bp); |
1464 | 1399 | ||
1465 | if ((error = XFS_bwrite(bp))) { | 1400 | if ((error = xlog_bdstrat(bp))) { |
1466 | xfs_ioerror_alert("xlog_sync", log->l_mp, bp, | 1401 | xfs_ioerror_alert("xlog_sync", log->l_mp, bp, |
1467 | XFS_BUF_ADDR(bp)); | 1402 | XFS_BUF_ADDR(bp)); |
1468 | return error; | 1403 | return error; |
@@ -1502,7 +1437,7 @@ xlog_sync(xlog_t *log, | |||
1502 | /* account for internal log which doesn't start at block #0 */ | 1437 | /* account for internal log which doesn't start at block #0 */ |
1503 | XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); | 1438 | XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); |
1504 | XFS_BUF_WRITE(bp); | 1439 | XFS_BUF_WRITE(bp); |
1505 | if ((error = XFS_bwrite(bp))) { | 1440 | if ((error = xlog_bdstrat(bp))) { |
1506 | xfs_ioerror_alert("xlog_sync (split)", log->l_mp, | 1441 | xfs_ioerror_alert("xlog_sync (split)", log->l_mp, |
1507 | bp, XFS_BUF_ADDR(bp)); | 1442 | bp, XFS_BUF_ADDR(bp)); |
1508 | return error; | 1443 | return error; |
@@ -2854,7 +2789,6 @@ xlog_state_switch_iclogs(xlog_t *log, | |||
2854 | log->l_iclog = iclog->ic_next; | 2789 | log->l_iclog = iclog->ic_next; |
2855 | } /* xlog_state_switch_iclogs */ | 2790 | } /* xlog_state_switch_iclogs */ |
2856 | 2791 | ||
2857 | |||
2858 | /* | 2792 | /* |
2859 | * Write out all data in the in-core log as of this exact moment in time. | 2793 | * Write out all data in the in-core log as of this exact moment in time. |
2860 | * | 2794 | * |
@@ -2882,11 +2816,17 @@ xlog_state_switch_iclogs(xlog_t *log, | |||
2882 | * b) when we return from flushing out this iclog, it is still | 2816 | * b) when we return from flushing out this iclog, it is still |
2883 | * not in the active nor dirty state. | 2817 | * not in the active nor dirty state. |
2884 | */ | 2818 | */ |
2885 | STATIC int | 2819 | int |
2886 | xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) | 2820 | _xfs_log_force( |
2821 | struct xfs_mount *mp, | ||
2822 | uint flags, | ||
2823 | int *log_flushed) | ||
2887 | { | 2824 | { |
2888 | xlog_in_core_t *iclog; | 2825 | struct log *log = mp->m_log; |
2889 | xfs_lsn_t lsn; | 2826 | struct xlog_in_core *iclog; |
2827 | xfs_lsn_t lsn; | ||
2828 | |||
2829 | XFS_STATS_INC(xs_log_force); | ||
2890 | 2830 | ||
2891 | spin_lock(&log->l_icloglock); | 2831 | spin_lock(&log->l_icloglock); |
2892 | 2832 | ||
@@ -2932,7 +2872,9 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) | |||
2932 | 2872 | ||
2933 | if (xlog_state_release_iclog(log, iclog)) | 2873 | if (xlog_state_release_iclog(log, iclog)) |
2934 | return XFS_ERROR(EIO); | 2874 | return XFS_ERROR(EIO); |
2935 | *log_flushed = 1; | 2875 | |
2876 | if (log_flushed) | ||
2877 | *log_flushed = 1; | ||
2936 | spin_lock(&log->l_icloglock); | 2878 | spin_lock(&log->l_icloglock); |
2937 | if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn && | 2879 | if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn && |
2938 | iclog->ic_state != XLOG_STATE_DIRTY) | 2880 | iclog->ic_state != XLOG_STATE_DIRTY) |
@@ -2976,19 +2918,37 @@ maybe_sleep: | |||
2976 | */ | 2918 | */ |
2977 | if (iclog->ic_state & XLOG_STATE_IOERROR) | 2919 | if (iclog->ic_state & XLOG_STATE_IOERROR) |
2978 | return XFS_ERROR(EIO); | 2920 | return XFS_ERROR(EIO); |
2979 | *log_flushed = 1; | 2921 | if (log_flushed) |
2980 | 2922 | *log_flushed = 1; | |
2981 | } else { | 2923 | } else { |
2982 | 2924 | ||
2983 | no_sleep: | 2925 | no_sleep: |
2984 | spin_unlock(&log->l_icloglock); | 2926 | spin_unlock(&log->l_icloglock); |
2985 | } | 2927 | } |
2986 | return 0; | 2928 | return 0; |
2987 | } /* xlog_state_sync_all */ | 2929 | } |
2988 | 2930 | ||
2931 | /* | ||
2932 | * Wrapper for _xfs_log_force(), to be used when caller doesn't care | ||
2933 | * about errors or whether the log was flushed or not. This is the normal | ||
2934 | * interface to use when trying to unpin items or move the log forward. | ||
2935 | */ | ||
2936 | void | ||
2937 | xfs_log_force( | ||
2938 | xfs_mount_t *mp, | ||
2939 | uint flags) | ||
2940 | { | ||
2941 | int error; | ||
2942 | |||
2943 | error = _xfs_log_force(mp, flags, NULL); | ||
2944 | if (error) { | ||
2945 | xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " | ||
2946 | "error %d returned.", error); | ||
2947 | } | ||
2948 | } | ||
2989 | 2949 | ||
2990 | /* | 2950 | /* |
2991 | * Used by code which implements synchronous log forces. | 2951 | * Force the in-core log to disk for a specific LSN. |
2992 | * | 2952 | * |
2993 | * Find in-core log with lsn. | 2953 | * Find in-core log with lsn. |
2994 | * If it is in the DIRTY state, just return. | 2954 | * If it is in the DIRTY state, just return. |
@@ -2996,109 +2956,142 @@ no_sleep: | |||
2996 | * state and go to sleep or return. | 2956 | * state and go to sleep or return. |
2997 | * If it is in any other state, go to sleep or return. | 2957 | * If it is in any other state, go to sleep or return. |
2998 | * | 2958 | * |
2999 | * If filesystem activity goes to zero, the iclog will get flushed only by | 2959 | * Synchronous forces are implemented with a signal variable. All callers |
3000 | * bdflush(). | 2960 | * to force a given lsn to disk will wait on a the sv attached to the |
2961 | * specific in-core log. When given in-core log finally completes its | ||
2962 | * write to disk, that thread will wake up all threads waiting on the | ||
2963 | * sv. | ||
3001 | */ | 2964 | */ |
3002 | STATIC int | 2965 | int |
3003 | xlog_state_sync(xlog_t *log, | 2966 | _xfs_log_force_lsn( |
3004 | xfs_lsn_t lsn, | 2967 | struct xfs_mount *mp, |
3005 | uint flags, | 2968 | xfs_lsn_t lsn, |
3006 | int *log_flushed) | 2969 | uint flags, |
2970 | int *log_flushed) | ||
3007 | { | 2971 | { |
3008 | xlog_in_core_t *iclog; | 2972 | struct log *log = mp->m_log; |
3009 | int already_slept = 0; | 2973 | struct xlog_in_core *iclog; |
2974 | int already_slept = 0; | ||
3010 | 2975 | ||
3011 | try_again: | 2976 | ASSERT(lsn != 0); |
3012 | spin_lock(&log->l_icloglock); | ||
3013 | iclog = log->l_iclog; | ||
3014 | 2977 | ||
3015 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | 2978 | XFS_STATS_INC(xs_log_force); |
3016 | spin_unlock(&log->l_icloglock); | ||
3017 | return XFS_ERROR(EIO); | ||
3018 | } | ||
3019 | |||
3020 | do { | ||
3021 | if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) { | ||
3022 | iclog = iclog->ic_next; | ||
3023 | continue; | ||
3024 | } | ||
3025 | 2979 | ||
3026 | if (iclog->ic_state == XLOG_STATE_DIRTY) { | 2980 | try_again: |
2981 | spin_lock(&log->l_icloglock); | ||
2982 | iclog = log->l_iclog; | ||
2983 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | ||
3027 | spin_unlock(&log->l_icloglock); | 2984 | spin_unlock(&log->l_icloglock); |
3028 | return 0; | 2985 | return XFS_ERROR(EIO); |
3029 | } | 2986 | } |
3030 | 2987 | ||
3031 | if (iclog->ic_state == XLOG_STATE_ACTIVE) { | 2988 | do { |
3032 | /* | 2989 | if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) { |
3033 | * We sleep here if we haven't already slept (e.g. | 2990 | iclog = iclog->ic_next; |
3034 | * this is the first time we've looked at the correct | 2991 | continue; |
3035 | * iclog buf) and the buffer before us is going to | 2992 | } |
3036 | * be sync'ed. The reason for this is that if we | 2993 | |
3037 | * are doing sync transactions here, by waiting for | 2994 | if (iclog->ic_state == XLOG_STATE_DIRTY) { |
3038 | * the previous I/O to complete, we can allow a few | 2995 | spin_unlock(&log->l_icloglock); |
3039 | * more transactions into this iclog before we close | 2996 | return 0; |
3040 | * it down. | 2997 | } |
3041 | * | 2998 | |
3042 | * Otherwise, we mark the buffer WANT_SYNC, and bump | 2999 | if (iclog->ic_state == XLOG_STATE_ACTIVE) { |
3043 | * up the refcnt so we can release the log (which drops | 3000 | /* |
3044 | * the ref count). The state switch keeps new transaction | 3001 | * We sleep here if we haven't already slept (e.g. |
3045 | * commits from using this buffer. When the current commits | 3002 | * this is the first time we've looked at the correct |
3046 | * finish writing into the buffer, the refcount will drop to | 3003 | * iclog buf) and the buffer before us is going to |
3047 | * zero and the buffer will go out then. | 3004 | * be sync'ed. The reason for this is that if we |
3048 | */ | 3005 | * are doing sync transactions here, by waiting for |
3049 | if (!already_slept && | 3006 | * the previous I/O to complete, we can allow a few |
3050 | (iclog->ic_prev->ic_state & (XLOG_STATE_WANT_SYNC | | 3007 | * more transactions into this iclog before we close |
3051 | XLOG_STATE_SYNCING))) { | 3008 | * it down. |
3052 | ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); | 3009 | * |
3053 | XFS_STATS_INC(xs_log_force_sleep); | 3010 | * Otherwise, we mark the buffer WANT_SYNC, and bump |
3054 | sv_wait(&iclog->ic_prev->ic_write_wait, PSWP, | 3011 | * up the refcnt so we can release the log (which |
3055 | &log->l_icloglock, s); | 3012 | * drops the ref count). The state switch keeps new |
3056 | *log_flushed = 1; | 3013 | * transaction commits from using this buffer. When |
3057 | already_slept = 1; | 3014 | * the current commits finish writing into the buffer, |
3058 | goto try_again; | 3015 | * the refcount will drop to zero and the buffer will |
3059 | } else { | 3016 | * go out then. |
3017 | */ | ||
3018 | if (!already_slept && | ||
3019 | (iclog->ic_prev->ic_state & | ||
3020 | (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) { | ||
3021 | ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); | ||
3022 | |||
3023 | XFS_STATS_INC(xs_log_force_sleep); | ||
3024 | |||
3025 | sv_wait(&iclog->ic_prev->ic_write_wait, | ||
3026 | PSWP, &log->l_icloglock, s); | ||
3027 | if (log_flushed) | ||
3028 | *log_flushed = 1; | ||
3029 | already_slept = 1; | ||
3030 | goto try_again; | ||
3031 | } | ||
3060 | atomic_inc(&iclog->ic_refcnt); | 3032 | atomic_inc(&iclog->ic_refcnt); |
3061 | xlog_state_switch_iclogs(log, iclog, 0); | 3033 | xlog_state_switch_iclogs(log, iclog, 0); |
3062 | spin_unlock(&log->l_icloglock); | 3034 | spin_unlock(&log->l_icloglock); |
3063 | if (xlog_state_release_iclog(log, iclog)) | 3035 | if (xlog_state_release_iclog(log, iclog)) |
3064 | return XFS_ERROR(EIO); | 3036 | return XFS_ERROR(EIO); |
3065 | *log_flushed = 1; | 3037 | if (log_flushed) |
3038 | *log_flushed = 1; | ||
3066 | spin_lock(&log->l_icloglock); | 3039 | spin_lock(&log->l_icloglock); |
3067 | } | 3040 | } |
3068 | } | ||
3069 | 3041 | ||
3070 | if ((flags & XFS_LOG_SYNC) && /* sleep */ | 3042 | if ((flags & XFS_LOG_SYNC) && /* sleep */ |
3071 | !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { | 3043 | !(iclog->ic_state & |
3044 | (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { | ||
3045 | /* | ||
3046 | * Don't wait on completion if we know that we've | ||
3047 | * gotten a log write error. | ||
3048 | */ | ||
3049 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | ||
3050 | spin_unlock(&log->l_icloglock); | ||
3051 | return XFS_ERROR(EIO); | ||
3052 | } | ||
3053 | XFS_STATS_INC(xs_log_force_sleep); | ||
3054 | sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s); | ||
3055 | /* | ||
3056 | * No need to grab the log lock here since we're | ||
3057 | * only deciding whether or not to return EIO | ||
3058 | * and the memory read should be atomic. | ||
3059 | */ | ||
3060 | if (iclog->ic_state & XLOG_STATE_IOERROR) | ||
3061 | return XFS_ERROR(EIO); | ||
3072 | 3062 | ||
3073 | /* | 3063 | if (log_flushed) |
3074 | * Don't wait on completion if we know that we've | 3064 | *log_flushed = 1; |
3075 | * gotten a log write error. | 3065 | } else { /* just return */ |
3076 | */ | ||
3077 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | ||
3078 | spin_unlock(&log->l_icloglock); | 3066 | spin_unlock(&log->l_icloglock); |
3079 | return XFS_ERROR(EIO); | ||
3080 | } | 3067 | } |
3081 | XFS_STATS_INC(xs_log_force_sleep); | ||
3082 | sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s); | ||
3083 | /* | ||
3084 | * No need to grab the log lock here since we're | ||
3085 | * only deciding whether or not to return EIO | ||
3086 | * and the memory read should be atomic. | ||
3087 | */ | ||
3088 | if (iclog->ic_state & XLOG_STATE_IOERROR) | ||
3089 | return XFS_ERROR(EIO); | ||
3090 | *log_flushed = 1; | ||
3091 | } else { /* just return */ | ||
3092 | spin_unlock(&log->l_icloglock); | ||
3093 | } | ||
3094 | return 0; | ||
3095 | 3068 | ||
3096 | } while (iclog != log->l_iclog); | 3069 | return 0; |
3070 | } while (iclog != log->l_iclog); | ||
3097 | 3071 | ||
3098 | spin_unlock(&log->l_icloglock); | 3072 | spin_unlock(&log->l_icloglock); |
3099 | return 0; | 3073 | return 0; |
3100 | } /* xlog_state_sync */ | 3074 | } |
3101 | 3075 | ||
3076 | /* | ||
3077 | * Wrapper for _xfs_log_force_lsn(), to be used when caller doesn't care | ||
3078 | * about errors or whether the log was flushed or not. This is the normal | ||
3079 | * interface to use when trying to unpin items or move the log forward. | ||
3080 | */ | ||
3081 | void | ||
3082 | xfs_log_force_lsn( | ||
3083 | xfs_mount_t *mp, | ||
3084 | xfs_lsn_t lsn, | ||
3085 | uint flags) | ||
3086 | { | ||
3087 | int error; | ||
3088 | |||
3089 | error = _xfs_log_force_lsn(mp, lsn, flags, NULL); | ||
3090 | if (error) { | ||
3091 | xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " | ||
3092 | "error %d returned.", error); | ||
3093 | } | ||
3094 | } | ||
3102 | 3095 | ||
3103 | /* | 3096 | /* |
3104 | * Called when we want to mark the current iclog as being ready to sync to | 3097 | * Called when we want to mark the current iclog as being ready to sync to |
@@ -3463,7 +3456,6 @@ xfs_log_force_umount( | |||
3463 | xlog_ticket_t *tic; | 3456 | xlog_ticket_t *tic; |
3464 | xlog_t *log; | 3457 | xlog_t *log; |
3465 | int retval; | 3458 | int retval; |
3466 | int dummy; | ||
3467 | 3459 | ||
3468 | log = mp->m_log; | 3460 | log = mp->m_log; |
3469 | 3461 | ||
@@ -3537,13 +3529,14 @@ xfs_log_force_umount( | |||
3537 | } | 3529 | } |
3538 | spin_unlock(&log->l_grant_lock); | 3530 | spin_unlock(&log->l_grant_lock); |
3539 | 3531 | ||
3540 | if (! (log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { | 3532 | if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { |
3541 | ASSERT(!logerror); | 3533 | ASSERT(!logerror); |
3542 | /* | 3534 | /* |
3543 | * Force the incore logs to disk before shutting the | 3535 | * Force the incore logs to disk before shutting the |
3544 | * log down completely. | 3536 | * log down completely. |
3545 | */ | 3537 | */ |
3546 | xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC, &dummy); | 3538 | _xfs_log_force(mp, XFS_LOG_SYNC, NULL); |
3539 | |||
3547 | spin_lock(&log->l_icloglock); | 3540 | spin_lock(&log->l_icloglock); |
3548 | retval = xlog_state_ioerror(log); | 3541 | retval = xlog_state_ioerror(log); |
3549 | spin_unlock(&log->l_icloglock); | 3542 | spin_unlock(&log->l_icloglock); |