aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Chinner <david@fromorbit.com>2014-10-01 19:11:14 -0400
committerDave Chinner <david@fromorbit.com>2014-10-01 19:11:14 -0400
commit75e58ce4c8f354f1a68a8bb8a9692827cdaf3d21 (patch)
tree4bbcd993a6e75d199c82958ffe15c0e0d03f4bbf
parentbd438f825f7badafe56d117ed906488c8541f95f (diff)
parent8c15612546bce1ecafb7dee3cce8a2a9b560e15e (diff)
Merge branch 'xfs-buf-iosubmit' into for-next
-rw-r--r--fs/xfs/xfs_bmap_util.c56
-rw-r--r--fs/xfs/xfs_buf.c353
-rw-r--r--fs/xfs/xfs_buf.h15
-rw-r--r--fs/xfs/xfs_buf_item.c8
-rw-r--r--fs/xfs/xfs_fsops.c11
-rw-r--r--fs/xfs/xfs_inode.c2
-rw-r--r--fs/xfs/xfs_log.c59
-rw-r--r--fs/xfs/xfs_log_recover.c32
-rw-r--r--fs/xfs/xfs_mount.c55
-rw-r--r--fs/xfs/xfs_rtalloc.c30
-rw-r--r--fs/xfs/xfs_trace.h3
-rw-r--r--fs/xfs/xfs_trans_buf.c16
12 files changed, 282 insertions, 358 deletions
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index d8b77b5bf4d9..c2aaa58e59ee 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1122,14 +1122,6 @@ xfs_zero_remaining_bytes(
1122 if (endoff > XFS_ISIZE(ip)) 1122 if (endoff > XFS_ISIZE(ip))
1123 endoff = XFS_ISIZE(ip); 1123 endoff = XFS_ISIZE(ip);
1124 1124
1125 bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ?
1126 mp->m_rtdev_targp : mp->m_ddev_targp,
1127 BTOBB(mp->m_sb.sb_blocksize), 0);
1128 if (!bp)
1129 return -ENOMEM;
1130
1131 xfs_buf_unlock(bp);
1132
1133 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { 1125 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
1134 uint lock_mode; 1126 uint lock_mode;
1135 1127
@@ -1152,42 +1144,24 @@ xfs_zero_remaining_bytes(
1152 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 1144 ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
1153 if (imap.br_state == XFS_EXT_UNWRITTEN) 1145 if (imap.br_state == XFS_EXT_UNWRITTEN)
1154 continue; 1146 continue;
1155 XFS_BUF_UNDONE(bp);
1156 XFS_BUF_UNWRITE(bp);
1157 XFS_BUF_READ(bp);
1158 XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
1159 1147
1160 if (XFS_FORCED_SHUTDOWN(mp)) { 1148 error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ?
1161 error = -EIO; 1149 mp->m_rtdev_targp : mp->m_ddev_targp,
1162 break; 1150 xfs_fsb_to_db(ip, imap.br_startblock),
1163 } 1151 BTOBB(mp->m_sb.sb_blocksize),
1164 xfs_buf_iorequest(bp); 1152 0, &bp, NULL);
1165 error = xfs_buf_iowait(bp); 1153 if (error)
1166 if (error) { 1154 return error;
1167 xfs_buf_ioerror_alert(bp, 1155
1168 "xfs_zero_remaining_bytes(read)");
1169 break;
1170 }
1171 memset(bp->b_addr + 1156 memset(bp->b_addr +
1172 (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), 1157 (offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
1173 0, lastoffset - offset + 1); 1158 0, lastoffset - offset + 1);
1174 XFS_BUF_UNDONE(bp); 1159
1175 XFS_BUF_UNREAD(bp); 1160 error = xfs_bwrite(bp);
1176 XFS_BUF_WRITE(bp); 1161 xfs_buf_relse(bp);
1177 1162 if (error)
1178 if (XFS_FORCED_SHUTDOWN(mp)) { 1163 return error;
1179 error = -EIO;
1180 break;
1181 }
1182 xfs_buf_iorequest(bp);
1183 error = xfs_buf_iowait(bp);
1184 if (error) {
1185 xfs_buf_ioerror_alert(bp,
1186 "xfs_zero_remaining_bytes(write)");
1187 break;
1188 }
1189 } 1164 }
1190 xfs_buf_free(bp);
1191 return error; 1165 return error;
1192} 1166}
1193 1167
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index ec6505056b2c..017b6afe340b 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -623,10 +623,11 @@ _xfs_buf_read(
623 bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); 623 bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
624 bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); 624 bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
625 625
626 xfs_buf_iorequest(bp); 626 if (flags & XBF_ASYNC) {
627 if (flags & XBF_ASYNC) 627 xfs_buf_submit(bp);
628 return 0; 628 return 0;
629 return xfs_buf_iowait(bp); 629 }
630 return xfs_buf_submit_wait(bp);
630} 631}
631 632
632xfs_buf_t * 633xfs_buf_t *
@@ -687,34 +688,39 @@ xfs_buf_readahead_map(
687 * Read an uncached buffer from disk. Allocates and returns a locked 688 * Read an uncached buffer from disk. Allocates and returns a locked
688 * buffer containing the disk contents or nothing. 689 * buffer containing the disk contents or nothing.
689 */ 690 */
690struct xfs_buf * 691int
691xfs_buf_read_uncached( 692xfs_buf_read_uncached(
692 struct xfs_buftarg *target, 693 struct xfs_buftarg *target,
693 xfs_daddr_t daddr, 694 xfs_daddr_t daddr,
694 size_t numblks, 695 size_t numblks,
695 int flags, 696 int flags,
697 struct xfs_buf **bpp,
696 const struct xfs_buf_ops *ops) 698 const struct xfs_buf_ops *ops)
697{ 699{
698 struct xfs_buf *bp; 700 struct xfs_buf *bp;
699 701
702 *bpp = NULL;
703
700 bp = xfs_buf_get_uncached(target, numblks, flags); 704 bp = xfs_buf_get_uncached(target, numblks, flags);
701 if (!bp) 705 if (!bp)
702 return NULL; 706 return -ENOMEM;
703 707
704 /* set up the buffer for a read IO */ 708 /* set up the buffer for a read IO */
705 ASSERT(bp->b_map_count == 1); 709 ASSERT(bp->b_map_count == 1);
706 bp->b_bn = daddr; 710 bp->b_bn = XFS_BUF_DADDR_NULL; /* always null for uncached buffers */
707 bp->b_maps[0].bm_bn = daddr; 711 bp->b_maps[0].bm_bn = daddr;
708 bp->b_flags |= XBF_READ; 712 bp->b_flags |= XBF_READ;
709 bp->b_ops = ops; 713 bp->b_ops = ops;
710 714
711 if (XFS_FORCED_SHUTDOWN(target->bt_mount)) { 715 xfs_buf_submit_wait(bp);
716 if (bp->b_error) {
717 int error = bp->b_error;
712 xfs_buf_relse(bp); 718 xfs_buf_relse(bp);
713 return NULL; 719 return error;
714 } 720 }
715 xfs_buf_iorequest(bp); 721
716 xfs_buf_iowait(bp); 722 *bpp = bp;
717 return bp; 723 return 0;
718} 724}
719 725
720/* 726/*
@@ -998,53 +1004,56 @@ xfs_buf_wait_unpin(
998 * Buffer Utility Routines 1004 * Buffer Utility Routines
999 */ 1005 */
1000 1006
1001STATIC void 1007void
1002xfs_buf_iodone_work( 1008xfs_buf_ioend(
1003 struct work_struct *work) 1009 struct xfs_buf *bp)
1004{ 1010{
1005 struct xfs_buf *bp = 1011 bool read = bp->b_flags & XBF_READ;
1006 container_of(work, xfs_buf_t, b_iodone_work); 1012
1007 bool read = !!(bp->b_flags & XBF_READ); 1013 trace_xfs_buf_iodone(bp, _RET_IP_);
1008 1014
1009 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); 1015 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
1010 1016
1011 /* only validate buffers that were read without errors */ 1017 /*
1012 if (read && bp->b_ops && !bp->b_error && (bp->b_flags & XBF_DONE)) 1018 * Pull in IO completion errors now. We are guaranteed to be running
1019 * single threaded, so we don't need the lock to read b_io_error.
1020 */
1021 if (!bp->b_error && bp->b_io_error)
1022 xfs_buf_ioerror(bp, bp->b_io_error);
1023
1024 /* Only validate buffers that were read without errors */
1025 if (read && !bp->b_error && bp->b_ops) {
1026 ASSERT(!bp->b_iodone);
1013 bp->b_ops->verify_read(bp); 1027 bp->b_ops->verify_read(bp);
1028 }
1029
1030 if (!bp->b_error)
1031 bp->b_flags |= XBF_DONE;
1014 1032
1015 if (bp->b_iodone) 1033 if (bp->b_iodone)
1016 (*(bp->b_iodone))(bp); 1034 (*(bp->b_iodone))(bp);
1017 else if (bp->b_flags & XBF_ASYNC) 1035 else if (bp->b_flags & XBF_ASYNC)
1018 xfs_buf_relse(bp); 1036 xfs_buf_relse(bp);
1019 else { 1037 else
1020 ASSERT(read && bp->b_ops);
1021 complete(&bp->b_iowait); 1038 complete(&bp->b_iowait);
1022 }
1023} 1039}
1024 1040
1025void 1041static void
1026xfs_buf_ioend( 1042xfs_buf_ioend_work(
1027 struct xfs_buf *bp, 1043 struct work_struct *work)
1028 int schedule)
1029{ 1044{
1030 bool read = !!(bp->b_flags & XBF_READ); 1045 struct xfs_buf *bp =
1031 1046 container_of(work, xfs_buf_t, b_iodone_work);
1032 trace_xfs_buf_iodone(bp, _RET_IP_);
1033 1047
1034 if (bp->b_error == 0) 1048 xfs_buf_ioend(bp);
1035 bp->b_flags |= XBF_DONE; 1049}
1036 1050
1037 if (bp->b_iodone || (read && bp->b_ops) || (bp->b_flags & XBF_ASYNC)) { 1051void
1038 if (schedule) { 1052xfs_buf_ioend_async(
1039 INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work); 1053 struct xfs_buf *bp)
1040 queue_work(xfslogd_workqueue, &bp->b_iodone_work); 1054{
1041 } else { 1055 INIT_WORK(&bp->b_iodone_work, xfs_buf_ioend_work);
1042 xfs_buf_iodone_work(&bp->b_iodone_work); 1056 queue_work(xfslogd_workqueue, &bp->b_iodone_work);
1043 }
1044 } else {
1045 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
1046 complete(&bp->b_iowait);
1047 }
1048} 1057}
1049 1058
1050void 1059void
@@ -1067,96 +1076,6 @@ xfs_buf_ioerror_alert(
1067 (__uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length); 1076 (__uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length);
1068} 1077}
1069 1078
1070/*
1071 * Called when we want to stop a buffer from getting written or read.
1072 * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
1073 * so that the proper iodone callbacks get called.
1074 */
1075STATIC int
1076xfs_bioerror(
1077 xfs_buf_t *bp)
1078{
1079#ifdef XFSERRORDEBUG
1080 ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
1081#endif
1082
1083 /*
1084 * No need to wait until the buffer is unpinned, we aren't flushing it.
1085 */
1086 xfs_buf_ioerror(bp, -EIO);
1087
1088 /*
1089 * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
1090 */
1091 XFS_BUF_UNREAD(bp);
1092 XFS_BUF_UNDONE(bp);
1093 xfs_buf_stale(bp);
1094
1095 xfs_buf_ioend(bp, 0);
1096
1097 return -EIO;
1098}
1099
1100/*
1101 * Same as xfs_bioerror, except that we are releasing the buffer
1102 * here ourselves, and avoiding the xfs_buf_ioend call.
1103 * This is meant for userdata errors; metadata bufs come with
1104 * iodone functions attached, so that we can track down errors.
1105 */
1106int
1107xfs_bioerror_relse(
1108 struct xfs_buf *bp)
1109{
1110 int64_t fl = bp->b_flags;
1111 /*
1112 * No need to wait until the buffer is unpinned.
1113 * We aren't flushing it.
1114 *
1115 * chunkhold expects B_DONE to be set, whether
1116 * we actually finish the I/O or not. We don't want to
1117 * change that interface.
1118 */
1119 XFS_BUF_UNREAD(bp);
1120 XFS_BUF_DONE(bp);
1121 xfs_buf_stale(bp);
1122 bp->b_iodone = NULL;
1123 if (!(fl & XBF_ASYNC)) {
1124 /*
1125 * Mark b_error and B_ERROR _both_.
1126 * Lot's of chunkcache code assumes that.
1127 * There's no reason to mark error for
1128 * ASYNC buffers.
1129 */
1130 xfs_buf_ioerror(bp, -EIO);
1131 complete(&bp->b_iowait);
1132 } else {
1133 xfs_buf_relse(bp);
1134 }
1135
1136 return -EIO;
1137}
1138
1139STATIC int
1140xfs_bdstrat_cb(
1141 struct xfs_buf *bp)
1142{
1143 if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
1144 trace_xfs_bdstrat_shut(bp, _RET_IP_);
1145 /*
1146 * Metadata write that didn't get logged but
1147 * written delayed anyway. These aren't associated
1148 * with a transaction, and can be ignored.
1149 */
1150 if (!bp->b_iodone && !XFS_BUF_ISREAD(bp))
1151 return xfs_bioerror_relse(bp);
1152 else
1153 return xfs_bioerror(bp);
1154 }
1155
1156 xfs_buf_iorequest(bp);
1157 return 0;
1158}
1159
1160int 1079int
1161xfs_bwrite( 1080xfs_bwrite(
1162 struct xfs_buf *bp) 1081 struct xfs_buf *bp)
@@ -1166,11 +1085,10 @@ xfs_bwrite(
1166 ASSERT(xfs_buf_islocked(bp)); 1085 ASSERT(xfs_buf_islocked(bp));
1167 1086
1168 bp->b_flags |= XBF_WRITE; 1087 bp->b_flags |= XBF_WRITE;
1169 bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | XBF_WRITE_FAIL); 1088 bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q |
1089 XBF_WRITE_FAIL | XBF_DONE);
1170 1090
1171 xfs_bdstrat_cb(bp); 1091 error = xfs_buf_submit_wait(bp);
1172
1173 error = xfs_buf_iowait(bp);
1174 if (error) { 1092 if (error) {
1175 xfs_force_shutdown(bp->b_target->bt_mount, 1093 xfs_force_shutdown(bp->b_target->bt_mount,
1176 SHUTDOWN_META_IO_ERROR); 1094 SHUTDOWN_META_IO_ERROR);
@@ -1179,15 +1097,6 @@ xfs_bwrite(
1179} 1097}
1180 1098
1181STATIC void 1099STATIC void
1182_xfs_buf_ioend(
1183 xfs_buf_t *bp,
1184 int schedule)
1185{
1186 if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
1187 xfs_buf_ioend(bp, schedule);
1188}
1189
1190STATIC void
1191xfs_buf_bio_end_io( 1100xfs_buf_bio_end_io(
1192 struct bio *bio, 1101 struct bio *bio,
1193 int error) 1102 int error)
@@ -1198,13 +1107,18 @@ xfs_buf_bio_end_io(
1198 * don't overwrite existing errors - otherwise we can lose errors on 1107 * don't overwrite existing errors - otherwise we can lose errors on
1199 * buffers that require multiple bios to complete. 1108 * buffers that require multiple bios to complete.
1200 */ 1109 */
1201 if (!bp->b_error) 1110 if (error) {
1202 xfs_buf_ioerror(bp, error); 1111 spin_lock(&bp->b_lock);
1112 if (!bp->b_io_error)
1113 bp->b_io_error = error;
1114 spin_unlock(&bp->b_lock);
1115 }
1203 1116
1204 if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) 1117 if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1205 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); 1118 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
1206 1119
1207 _xfs_buf_ioend(bp, 1); 1120 if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
1121 xfs_buf_ioend_async(bp);
1208 bio_put(bio); 1122 bio_put(bio);
1209} 1123}
1210 1124
@@ -1283,7 +1197,7 @@ next_chunk:
1283 } else { 1197 } else {
1284 /* 1198 /*
1285 * This is guaranteed not to be the last io reference count 1199 * This is guaranteed not to be the last io reference count
1286 * because the caller (xfs_buf_iorequest) holds a count itself. 1200 * because the caller (xfs_buf_submit) holds a count itself.
1287 */ 1201 */
1288 atomic_dec(&bp->b_io_remaining); 1202 atomic_dec(&bp->b_io_remaining);
1289 xfs_buf_ioerror(bp, -EIO); 1203 xfs_buf_ioerror(bp, -EIO);
@@ -1373,53 +1287,131 @@ _xfs_buf_ioapply(
1373 blk_finish_plug(&plug); 1287 blk_finish_plug(&plug);
1374} 1288}
1375 1289
1290/*
1291 * Asynchronous IO submission path. This transfers the buffer lock ownership and
1292 * the current reference to the IO. It is not safe to reference the buffer after
1293 * a call to this function unless the caller holds an additional reference
1294 * itself.
1295 */
1376void 1296void
1377xfs_buf_iorequest( 1297xfs_buf_submit(
1378 xfs_buf_t *bp) 1298 struct xfs_buf *bp)
1379{ 1299{
1380 trace_xfs_buf_iorequest(bp, _RET_IP_); 1300 trace_xfs_buf_submit(bp, _RET_IP_);
1381 1301
1382 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); 1302 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
1303 ASSERT(bp->b_flags & XBF_ASYNC);
1304
1305 /* on shutdown we stale and complete the buffer immediately */
1306 if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
1307 xfs_buf_ioerror(bp, -EIO);
1308 bp->b_flags &= ~XBF_DONE;
1309 xfs_buf_stale(bp);
1310 xfs_buf_ioend(bp);
1311 return;
1312 }
1383 1313
1384 if (bp->b_flags & XBF_WRITE) 1314 if (bp->b_flags & XBF_WRITE)
1385 xfs_buf_wait_unpin(bp); 1315 xfs_buf_wait_unpin(bp);
1316
1317 /* clear the internal error state to avoid spurious errors */
1318 bp->b_io_error = 0;
1319
1320 /*
1321 * The caller's reference is released during I/O completion.
1322 * This occurs some time after the last b_io_remaining reference is
1323 * released, so after we drop our Io reference we have to have some
1324 * other reference to ensure the buffer doesn't go away from underneath
1325 * us. Take a direct reference to ensure we have safe access to the
1326 * buffer until we are finished with it.
1327 */
1386 xfs_buf_hold(bp); 1328 xfs_buf_hold(bp);
1387 1329
1388 /* 1330 /*
1389 * Set the count to 1 initially, this will stop an I/O 1331 * Set the count to 1 initially, this will stop an I/O completion
1390 * completion callout which happens before we have started 1332 * callout which happens before we have started all the I/O from calling
1391 * all the I/O from calling xfs_buf_ioend too early. 1333 * xfs_buf_ioend too early.
1392 */ 1334 */
1393 atomic_set(&bp->b_io_remaining, 1); 1335 atomic_set(&bp->b_io_remaining, 1);
1394 _xfs_buf_ioapply(bp); 1336 _xfs_buf_ioapply(bp);
1337
1395 /* 1338 /*
1396 * If _xfs_buf_ioapply failed, we'll get back here with 1339 * If _xfs_buf_ioapply failed, we can get back here with only the IO
1397 * only the reference we took above. _xfs_buf_ioend will 1340 * reference we took above. If we drop it to zero, run completion so
1398 * drop it to zero, so we'd better not queue it for later, 1341 * that we don't return to the caller with completion still pending.
1399 * or we'll free it before it's done.
1400 */ 1342 */
1401 _xfs_buf_ioend(bp, bp->b_error ? 0 : 1); 1343 if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
1344 if (bp->b_error)
1345 xfs_buf_ioend(bp);
1346 else
1347 xfs_buf_ioend_async(bp);
1348 }
1402 1349
1403 xfs_buf_rele(bp); 1350 xfs_buf_rele(bp);
1351 /* Note: it is not safe to reference bp now we've dropped our ref */
1404} 1352}
1405 1353
1406/* 1354/*
1407 * Waits for I/O to complete on the buffer supplied. It returns immediately if 1355 * Synchronous buffer IO submission path, read or write.
1408 * no I/O is pending or there is already a pending error on the buffer, in which
1409 * case nothing will ever complete. It returns the I/O error code, if any, or
1410 * 0 if there was no error.
1411 */ 1356 */
1412int 1357int
1413xfs_buf_iowait( 1358xfs_buf_submit_wait(
1414 xfs_buf_t *bp) 1359 struct xfs_buf *bp)
1415{ 1360{
1416 trace_xfs_buf_iowait(bp, _RET_IP_); 1361 int error;
1417 1362
1418 if (!bp->b_error) 1363 trace_xfs_buf_submit_wait(bp, _RET_IP_);
1419 wait_for_completion(&bp->b_iowait); 1364
1365 ASSERT(!(bp->b_flags & (_XBF_DELWRI_Q | XBF_ASYNC)));
1366
1367 if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
1368 xfs_buf_ioerror(bp, -EIO);
1369 xfs_buf_stale(bp);
1370 bp->b_flags &= ~XBF_DONE;
1371 return -EIO;
1372 }
1373
1374 if (bp->b_flags & XBF_WRITE)
1375 xfs_buf_wait_unpin(bp);
1376
1377 /* clear the internal error state to avoid spurious errors */
1378 bp->b_io_error = 0;
1379
1380 /*
1381 * For synchronous IO, the IO does not inherit the submitters reference
1382 * count, nor the buffer lock. Hence we cannot release the reference we
1383 * are about to take until we've waited for all IO completion to occur,
1384 * including any xfs_buf_ioend_async() work that may be pending.
1385 */
1386 xfs_buf_hold(bp);
1387
1388 /*
1389 * Set the count to 1 initially, this will stop an I/O completion
1390 * callout which happens before we have started all the I/O from calling
1391 * xfs_buf_ioend too early.
1392 */
1393 atomic_set(&bp->b_io_remaining, 1);
1394 _xfs_buf_ioapply(bp);
1395
1396 /*
1397 * make sure we run completion synchronously if it raced with us and is
1398 * already complete.
1399 */
1400 if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
1401 xfs_buf_ioend(bp);
1420 1402
1403 /* wait for completion before gathering the error from the buffer */
1404 trace_xfs_buf_iowait(bp, _RET_IP_);
1405 wait_for_completion(&bp->b_iowait);
1421 trace_xfs_buf_iowait_done(bp, _RET_IP_); 1406 trace_xfs_buf_iowait_done(bp, _RET_IP_);
1422 return bp->b_error; 1407 error = bp->b_error;
1408
1409 /*
1410 * all done now, we can release the hold that keeps the buffer
1411 * referenced for the entire IO.
1412 */
1413 xfs_buf_rele(bp);
1414 return error;
1423} 1415}
1424 1416
1425xfs_caddr_t 1417xfs_caddr_t
@@ -1813,13 +1805,19 @@ __xfs_buf_delwri_submit(
1813 blk_start_plug(&plug); 1805 blk_start_plug(&plug);
1814 list_for_each_entry_safe(bp, n, io_list, b_list) { 1806 list_for_each_entry_safe(bp, n, io_list, b_list) {
1815 bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL); 1807 bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL);
1816 bp->b_flags |= XBF_WRITE; 1808 bp->b_flags |= XBF_WRITE | XBF_ASYNC;
1817 1809
1818 if (!wait) { 1810 /*
1819 bp->b_flags |= XBF_ASYNC; 1811 * we do all Io submission async. This means if we need to wait
1812 * for IO completion we need to take an extra reference so the
1813 * buffer is still valid on the other side.
1814 */
1815 if (wait)
1816 xfs_buf_hold(bp);
1817 else
1820 list_del_init(&bp->b_list); 1818 list_del_init(&bp->b_list);
1821 } 1819
1822 xfs_bdstrat_cb(bp); 1820 xfs_buf_submit(bp);
1823 } 1821 }
1824 blk_finish_plug(&plug); 1822 blk_finish_plug(&plug);
1825 1823
@@ -1866,7 +1864,10 @@ xfs_buf_delwri_submit(
1866 bp = list_first_entry(&io_list, struct xfs_buf, b_list); 1864 bp = list_first_entry(&io_list, struct xfs_buf, b_list);
1867 1865
1868 list_del_init(&bp->b_list); 1866 list_del_init(&bp->b_list);
1869 error2 = xfs_buf_iowait(bp); 1867
1868 /* locking the buffer will wait for async IO completion. */
1869 xfs_buf_lock(bp);
1870 error2 = bp->b_error;
1870 xfs_buf_relse(bp); 1871 xfs_buf_relse(bp);
1871 if (!error) 1872 if (!error)
1872 error = error2; 1873 error = error2;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index c753183900b3..82002c00af90 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -158,6 +158,7 @@ typedef struct xfs_buf {
158 struct list_head b_lru; /* lru list */ 158 struct list_head b_lru; /* lru list */
159 spinlock_t b_lock; /* internal state lock */ 159 spinlock_t b_lock; /* internal state lock */
160 unsigned int b_state; /* internal state flags */ 160 unsigned int b_state; /* internal state flags */
161 int b_io_error; /* internal IO error state */
161 wait_queue_head_t b_waiters; /* unpin waiters */ 162 wait_queue_head_t b_waiters; /* unpin waiters */
162 struct list_head b_list; 163 struct list_head b_list;
163 struct xfs_perag *b_pag; /* contains rbtree root */ 164 struct xfs_perag *b_pag; /* contains rbtree root */
@@ -268,9 +269,9 @@ int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length);
268 269
269struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, 270struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks,
270 int flags); 271 int flags);
271struct xfs_buf *xfs_buf_read_uncached(struct xfs_buftarg *target, 272int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr,
272 xfs_daddr_t daddr, size_t numblks, int flags, 273 size_t numblks, int flags, struct xfs_buf **bpp,
273 const struct xfs_buf_ops *ops); 274 const struct xfs_buf_ops *ops);
274void xfs_buf_hold(struct xfs_buf *bp); 275void xfs_buf_hold(struct xfs_buf *bp);
275 276
276/* Releasing Buffers */ 277/* Releasing Buffers */
@@ -286,18 +287,16 @@ extern void xfs_buf_unlock(xfs_buf_t *);
286 287
287/* Buffer Read and Write Routines */ 288/* Buffer Read and Write Routines */
288extern int xfs_bwrite(struct xfs_buf *bp); 289extern int xfs_bwrite(struct xfs_buf *bp);
289extern void xfs_buf_ioend(xfs_buf_t *, int); 290extern void xfs_buf_ioend(struct xfs_buf *bp);
290extern void xfs_buf_ioerror(xfs_buf_t *, int); 291extern void xfs_buf_ioerror(xfs_buf_t *, int);
291extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); 292extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func);
292extern void xfs_buf_iorequest(xfs_buf_t *); 293extern void xfs_buf_submit(struct xfs_buf *bp);
293extern int xfs_buf_iowait(xfs_buf_t *); 294extern int xfs_buf_submit_wait(struct xfs_buf *bp);
294extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, 295extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
295 xfs_buf_rw_t); 296 xfs_buf_rw_t);
296#define xfs_buf_zero(bp, off, len) \ 297#define xfs_buf_zero(bp, off, len) \
297 xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) 298 xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
298 299
299extern int xfs_bioerror_relse(struct xfs_buf *);
300
301/* Buffer Utility Routines */ 300/* Buffer Utility Routines */
302extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); 301extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
303 302
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 30fa5db9aea8..f15969543326 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -491,7 +491,7 @@ xfs_buf_item_unpin(
491 xfs_buf_ioerror(bp, -EIO); 491 xfs_buf_ioerror(bp, -EIO);
492 XFS_BUF_UNDONE(bp); 492 XFS_BUF_UNDONE(bp);
493 xfs_buf_stale(bp); 493 xfs_buf_stale(bp);
494 xfs_buf_ioend(bp, 0); 494 xfs_buf_ioend(bp);
495 } 495 }
496} 496}
497 497
@@ -1081,7 +1081,7 @@ xfs_buf_iodone_callbacks(
1081 * a way to shut the filesystem down if the writes keep failing. 1081 * a way to shut the filesystem down if the writes keep failing.
1082 * 1082 *
1083 * In practice we'll shut the filesystem down soon as non-transient 1083 * In practice we'll shut the filesystem down soon as non-transient
1084 * erorrs tend to affect the whole device and a failing log write 1084 * errors tend to affect the whole device and a failing log write
1085 * will make us give up. But we really ought to do better here. 1085 * will make us give up. But we really ought to do better here.
1086 */ 1086 */
1087 if (XFS_BUF_ISASYNC(bp)) { 1087 if (XFS_BUF_ISASYNC(bp)) {
@@ -1094,7 +1094,7 @@ xfs_buf_iodone_callbacks(
1094 if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) { 1094 if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) {
1095 bp->b_flags |= XBF_WRITE | XBF_ASYNC | 1095 bp->b_flags |= XBF_WRITE | XBF_ASYNC |
1096 XBF_DONE | XBF_WRITE_FAIL; 1096 XBF_DONE | XBF_WRITE_FAIL;
1097 xfs_buf_iorequest(bp); 1097 xfs_buf_submit(bp);
1098 } else { 1098 } else {
1099 xfs_buf_relse(bp); 1099 xfs_buf_relse(bp);
1100 } 1100 }
@@ -1115,7 +1115,7 @@ do_callbacks:
1115 xfs_buf_do_callbacks(bp); 1115 xfs_buf_do_callbacks(bp);
1116 bp->b_fspriv = NULL; 1116 bp->b_fspriv = NULL;
1117 bp->b_iodone = NULL; 1117 bp->b_iodone = NULL;
1118 xfs_buf_ioend(bp, 0); 1118 xfs_buf_ioend(bp);
1119} 1119}
1120 1120
1121/* 1121/*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index f91de1ef05e1..c05ac8b70fa9 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -172,16 +172,11 @@ xfs_growfs_data_private(
172 if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) 172 if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
173 return error; 173 return error;
174 dpct = pct - mp->m_sb.sb_imax_pct; 174 dpct = pct - mp->m_sb.sb_imax_pct;
175 bp = xfs_buf_read_uncached(mp->m_ddev_targp, 175 error = xfs_buf_read_uncached(mp->m_ddev_targp,
176 XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), 176 XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
177 XFS_FSS_TO_BB(mp, 1), 0, NULL); 177 XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
178 if (!bp) 178 if (error)
179 return -EIO;
180 if (bp->b_error) {
181 error = bp->b_error;
182 xfs_buf_relse(bp);
183 return error; 179 return error;
184 }
185 xfs_buf_relse(bp); 180 xfs_buf_relse(bp);
186 181
187 new = nb; /* use new as a temporary here */ 182 new = nb; /* use new as a temporary here */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index c92cb48617d1..e5bbc1f30f16 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3062,7 +3062,7 @@ cluster_corrupt_out:
3062 XFS_BUF_UNDONE(bp); 3062 XFS_BUF_UNDONE(bp);
3063 xfs_buf_stale(bp); 3063 xfs_buf_stale(bp);
3064 xfs_buf_ioerror(bp, -EIO); 3064 xfs_buf_ioerror(bp, -EIO);
3065 xfs_buf_ioend(bp, 0); 3065 xfs_buf_ioend(bp);
3066 } else { 3066 } else {
3067 xfs_buf_stale(bp); 3067 xfs_buf_stale(bp);
3068 xfs_buf_relse(bp); 3068 xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index ca4fd5bd8522..fe88ef67f93a 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1678,7 +1678,7 @@ xlog_bdstrat(
1678 if (iclog->ic_state & XLOG_STATE_IOERROR) { 1678 if (iclog->ic_state & XLOG_STATE_IOERROR) {
1679 xfs_buf_ioerror(bp, -EIO); 1679 xfs_buf_ioerror(bp, -EIO);
1680 xfs_buf_stale(bp); 1680 xfs_buf_stale(bp);
1681 xfs_buf_ioend(bp, 0); 1681 xfs_buf_ioend(bp);
1682 /* 1682 /*
1683 * It would seem logical to return EIO here, but we rely on 1683 * It would seem logical to return EIO here, but we rely on
1684 * the log state machine to propagate I/O errors instead of 1684 * the log state machine to propagate I/O errors instead of
@@ -1688,7 +1688,7 @@ xlog_bdstrat(
1688 return 0; 1688 return 0;
1689 } 1689 }
1690 1690
1691 xfs_buf_iorequest(bp); 1691 xfs_buf_submit(bp);
1692 return 0; 1692 return 0;
1693} 1693}
1694 1694
@@ -3867,18 +3867,17 @@ xlog_state_ioerror(
3867 * This is called from xfs_force_shutdown, when we're forcibly 3867 * This is called from xfs_force_shutdown, when we're forcibly
3868 * shutting down the filesystem, typically because of an IO error. 3868 * shutting down the filesystem, typically because of an IO error.
3869 * Our main objectives here are to make sure that: 3869 * Our main objectives here are to make sure that:
3870 * a. the filesystem gets marked 'SHUTDOWN' for all interested 3870 * a. if !logerror, flush the logs to disk. Anything modified
3871 * after this is ignored.
3872 * b. the filesystem gets marked 'SHUTDOWN' for all interested
3871 * parties to find out, 'atomically'. 3873 * parties to find out, 'atomically'.
3872 * b. those who're sleeping on log reservations, pinned objects and 3874 * c. those who're sleeping on log reservations, pinned objects and
3873 * other resources get woken up, and be told the bad news. 3875 * other resources get woken up, and be told the bad news.
3874 * c. nothing new gets queued up after (a) and (b) are done. 3876 * d. nothing new gets queued up after (b) and (c) are done.
3875 * d. if !logerror, flush the iclogs to disk, then seal them off
3876 * for business.
3877 * 3877 *
3878 * Note: for delayed logging the !logerror case needs to flush the regions 3878 * Note: for the !logerror case we need to flush the regions held in memory out
3879 * held in memory out to the iclogs before flushing them to disk. This needs 3879 * to disk first. This needs to be done before the log is marked as shutdown,
3880 * to be done before the log is marked as shutdown, otherwise the flush to the 3880 * otherwise the iclog writes will fail.
3881 * iclogs will fail.
3882 */ 3881 */
3883int 3882int
3884xfs_log_force_umount( 3883xfs_log_force_umount(
@@ -3910,16 +3909,16 @@ xfs_log_force_umount(
3910 ASSERT(XLOG_FORCED_SHUTDOWN(log)); 3909 ASSERT(XLOG_FORCED_SHUTDOWN(log));
3911 return 1; 3910 return 1;
3912 } 3911 }
3913 retval = 0;
3914 3912
3915 /* 3913 /*
3916 * Flush the in memory commit item list before marking the log as 3914 * Flush all the completed transactions to disk before marking the log
3917 * being shut down. We need to do it in this order to ensure all the 3915 * being shut down. We need to do it in this order to ensure that
3918 * completed transactions are flushed to disk with the xfs_log_force() 3916 * completed operations are safely on disk before we shut down, and that
3919 * call below. 3917 * we don't have to issue any buffer IO after the shutdown flags are set
3918 * to guarantee this.
3920 */ 3919 */
3921 if (!logerror) 3920 if (!logerror)
3922 xlog_cil_force(log); 3921 _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
3923 3922
3924 /* 3923 /*
3925 * mark the filesystem and the as in a shutdown state and wake 3924 * mark the filesystem and the as in a shutdown state and wake
@@ -3931,18 +3930,11 @@ xfs_log_force_umount(
3931 XFS_BUF_DONE(mp->m_sb_bp); 3930 XFS_BUF_DONE(mp->m_sb_bp);
3932 3931
3933 /* 3932 /*
3934 * This flag is sort of redundant because of the mount flag, but 3933 * Mark the log and the iclogs with IO error flags to prevent any
3935 * it's good to maintain the separation between the log and the rest 3934 * further log IO from being issued or completed.
3936 * of XFS.
3937 */ 3935 */
3938 log->l_flags |= XLOG_IO_ERROR; 3936 log->l_flags |= XLOG_IO_ERROR;
3939 3937 retval = xlog_state_ioerror(log);
3940 /*
3941 * If we hit a log error, we want to mark all the iclogs IOERROR
3942 * while we're still holding the loglock.
3943 */
3944 if (logerror)
3945 retval = xlog_state_ioerror(log);
3946 spin_unlock(&log->l_icloglock); 3938 spin_unlock(&log->l_icloglock);
3947 3939
3948 /* 3940 /*
@@ -3955,19 +3947,6 @@ xfs_log_force_umount(
3955 xlog_grant_head_wake_all(&log->l_reserve_head); 3947 xlog_grant_head_wake_all(&log->l_reserve_head);
3956 xlog_grant_head_wake_all(&log->l_write_head); 3948 xlog_grant_head_wake_all(&log->l_write_head);
3957 3949
3958 if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
3959 ASSERT(!logerror);
3960 /*
3961 * Force the incore logs to disk before shutting the
3962 * log down completely.
3963 */
3964 _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
3965
3966 spin_lock(&log->l_icloglock);
3967 retval = xlog_state_ioerror(log);
3968 spin_unlock(&log->l_icloglock);
3969 }
3970
3971 /* 3950 /*
3972 * Wake up everybody waiting on xfs_log_force. Wake the CIL push first 3951 * Wake up everybody waiting on xfs_log_force. Wake the CIL push first
3973 * as if the log writes were completed. The abort handling in the log 3952 * as if the log writes were completed. The abort handling in the log
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 79cfe7e6ec7a..00cd7f3a8f59 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -193,12 +193,8 @@ xlog_bread_noalign(
193 bp->b_io_length = nbblks; 193 bp->b_io_length = nbblks;
194 bp->b_error = 0; 194 bp->b_error = 0;
195 195
196 if (XFS_FORCED_SHUTDOWN(log->l_mp)) 196 error = xfs_buf_submit_wait(bp);
197 return -EIO; 197 if (error && !XFS_FORCED_SHUTDOWN(log->l_mp))
198
199 xfs_buf_iorequest(bp);
200 error = xfs_buf_iowait(bp);
201 if (error)
202 xfs_buf_ioerror_alert(bp, __func__); 198 xfs_buf_ioerror_alert(bp, __func__);
203 return error; 199 return error;
204} 200}
@@ -378,12 +374,14 @@ xlog_recover_iodone(
378 * We're not going to bother about retrying 374 * We're not going to bother about retrying
379 * this during recovery. One strike! 375 * this during recovery. One strike!
380 */ 376 */
381 xfs_buf_ioerror_alert(bp, __func__); 377 if (!XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
382 xfs_force_shutdown(bp->b_target->bt_mount, 378 xfs_buf_ioerror_alert(bp, __func__);
383 SHUTDOWN_META_IO_ERROR); 379 xfs_force_shutdown(bp->b_target->bt_mount,
380 SHUTDOWN_META_IO_ERROR);
381 }
384 } 382 }
385 bp->b_iodone = NULL; 383 bp->b_iodone = NULL;
386 xfs_buf_ioend(bp, 0); 384 xfs_buf_ioend(bp);
387} 385}
388 386
389/* 387/*
@@ -4452,16 +4450,12 @@ xlog_do_recover(
4452 XFS_BUF_UNASYNC(bp); 4450 XFS_BUF_UNASYNC(bp);
4453 bp->b_ops = &xfs_sb_buf_ops; 4451 bp->b_ops = &xfs_sb_buf_ops;
4454 4452
4455 if (XFS_FORCED_SHUTDOWN(log->l_mp)) { 4453 error = xfs_buf_submit_wait(bp);
4456 xfs_buf_relse(bp);
4457 return -EIO;
4458 }
4459
4460 xfs_buf_iorequest(bp);
4461 error = xfs_buf_iowait(bp);
4462 if (error) { 4454 if (error) {
4463 xfs_buf_ioerror_alert(bp, __func__); 4455 if (!XFS_FORCED_SHUTDOWN(log->l_mp)) {
4464 ASSERT(0); 4456 xfs_buf_ioerror_alert(bp, __func__);
4457 ASSERT(0);
4458 }
4465 xfs_buf_relse(bp); 4459 xfs_buf_relse(bp);
4466 return error; 4460 return error;
4467 } 4461 }
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d36bdbc9eeb2..51435dbce9c4 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -300,21 +300,15 @@ xfs_readsb(
300 * access to the superblock. 300 * access to the superblock.
301 */ 301 */
302reread: 302reread:
303 bp = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR, 303 error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
304 BTOBB(sector_size), 0, buf_ops); 304 BTOBB(sector_size), 0, &bp, buf_ops);
305 if (!bp) { 305 if (error) {
306 if (loud)
307 xfs_warn(mp, "SB buffer read failed");
308 return -EIO;
309 }
310 if (bp->b_error) {
311 error = bp->b_error;
312 if (loud) 306 if (loud)
313 xfs_warn(mp, "SB validate failed with error %d.", error); 307 xfs_warn(mp, "SB validate failed with error %d.", error);
314 /* bad CRC means corrupted metadata */ 308 /* bad CRC means corrupted metadata */
315 if (error == -EFSBADCRC) 309 if (error == -EFSBADCRC)
316 error = -EFSCORRUPTED; 310 error = -EFSCORRUPTED;
317 goto release_buf; 311 return error;
318 } 312 }
319 313
320 /* 314 /*
@@ -544,40 +538,43 @@ xfs_set_inoalignment(xfs_mount_t *mp)
544 * Check that the data (and log if separate) is an ok size. 538 * Check that the data (and log if separate) is an ok size.
545 */ 539 */
546STATIC int 540STATIC int
547xfs_check_sizes(xfs_mount_t *mp) 541xfs_check_sizes(
542 struct xfs_mount *mp)
548{ 543{
549 xfs_buf_t *bp; 544 struct xfs_buf *bp;
550 xfs_daddr_t d; 545 xfs_daddr_t d;
546 int error;
551 547
552 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 548 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
553 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { 549 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
554 xfs_warn(mp, "filesystem size mismatch detected"); 550 xfs_warn(mp, "filesystem size mismatch detected");
555 return -EFBIG; 551 return -EFBIG;
556 } 552 }
557 bp = xfs_buf_read_uncached(mp->m_ddev_targp, 553 error = xfs_buf_read_uncached(mp->m_ddev_targp,
558 d - XFS_FSS_TO_BB(mp, 1), 554 d - XFS_FSS_TO_BB(mp, 1),
559 XFS_FSS_TO_BB(mp, 1), 0, NULL); 555 XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
560 if (!bp) { 556 if (error) {
561 xfs_warn(mp, "last sector read failed"); 557 xfs_warn(mp, "last sector read failed");
562 return -EIO; 558 return error;
563 } 559 }
564 xfs_buf_relse(bp); 560 xfs_buf_relse(bp);
565 561
566 if (mp->m_logdev_targp != mp->m_ddev_targp) { 562 if (mp->m_logdev_targp == mp->m_ddev_targp)
567 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 563 return 0;
568 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { 564
569 xfs_warn(mp, "log size mismatch detected"); 565 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
570 return -EFBIG; 566 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
571 } 567 xfs_warn(mp, "log size mismatch detected");
572 bp = xfs_buf_read_uncached(mp->m_logdev_targp, 568 return -EFBIG;
569 }
570 error = xfs_buf_read_uncached(mp->m_logdev_targp,
573 d - XFS_FSB_TO_BB(mp, 1), 571 d - XFS_FSB_TO_BB(mp, 1),
574 XFS_FSB_TO_BB(mp, 1), 0, NULL); 572 XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
575 if (!bp) { 573 if (error) {
576 xfs_warn(mp, "log device read failed"); 574 xfs_warn(mp, "log device read failed");
577 return -EIO; 575 return error;
578 }
579 xfs_buf_relse(bp);
580 } 576 }
577 xfs_buf_relse(bp);
581 return 0; 578 return 0;
582} 579}
583 580
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index d45aebe04dde..e1175ea9b551 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -921,16 +921,11 @@ xfs_growfs_rt(
921 /* 921 /*
922 * Read in the last block of the device, make sure it exists. 922 * Read in the last block of the device, make sure it exists.
923 */ 923 */
924 bp = xfs_buf_read_uncached(mp->m_rtdev_targp, 924 error = xfs_buf_read_uncached(mp->m_rtdev_targp,
925 XFS_FSB_TO_BB(mp, nrblocks - 1), 925 XFS_FSB_TO_BB(mp, nrblocks - 1),
926 XFS_FSB_TO_BB(mp, 1), 0, NULL); 926 XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
927 if (!bp) 927 if (error)
928 return -EIO;
929 if (bp->b_error) {
930 error = bp->b_error;
931 xfs_buf_relse(bp);
932 return error; 928 return error;
933 }
934 xfs_buf_relse(bp); 929 xfs_buf_relse(bp);
935 930
936 /* 931 /*
@@ -1184,11 +1179,12 @@ xfs_rtallocate_extent(
1184 */ 1179 */
1185int /* error */ 1180int /* error */
1186xfs_rtmount_init( 1181xfs_rtmount_init(
1187 xfs_mount_t *mp) /* file system mount structure */ 1182 struct xfs_mount *mp) /* file system mount structure */
1188{ 1183{
1189 xfs_buf_t *bp; /* buffer for last block of subvolume */ 1184 struct xfs_buf *bp; /* buffer for last block of subvolume */
1190 xfs_daddr_t d; /* address of last block of subvolume */ 1185 struct xfs_sb *sbp; /* filesystem superblock copy in mount */
1191 xfs_sb_t *sbp; /* filesystem superblock copy in mount */ 1186 xfs_daddr_t d; /* address of last block of subvolume */
1187 int error;
1192 1188
1193 sbp = &mp->m_sb; 1189 sbp = &mp->m_sb;
1194 if (sbp->sb_rblocks == 0) 1190 if (sbp->sb_rblocks == 0)
@@ -1214,14 +1210,12 @@ xfs_rtmount_init(
1214 (unsigned long long) mp->m_sb.sb_rblocks); 1210 (unsigned long long) mp->m_sb.sb_rblocks);
1215 return -EFBIG; 1211 return -EFBIG;
1216 } 1212 }
1217 bp = xfs_buf_read_uncached(mp->m_rtdev_targp, 1213 error = xfs_buf_read_uncached(mp->m_rtdev_targp,
1218 d - XFS_FSB_TO_BB(mp, 1), 1214 d - XFS_FSB_TO_BB(mp, 1),
1219 XFS_FSB_TO_BB(mp, 1), 0, NULL); 1215 XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
1220 if (!bp || bp->b_error) { 1216 if (error) {
1221 xfs_warn(mp, "realtime device size check failed"); 1217 xfs_warn(mp, "realtime device size check failed");
1222 if (bp) 1218 return error;
1223 xfs_buf_relse(bp);
1224 return -EIO;
1225 } 1219 }
1226 xfs_buf_relse(bp); 1220 xfs_buf_relse(bp);
1227 return 0; 1221 return 0;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 152f82782630..51372e34d988 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -349,7 +349,8 @@ DEFINE_BUF_EVENT(xfs_buf_free);
349DEFINE_BUF_EVENT(xfs_buf_hold); 349DEFINE_BUF_EVENT(xfs_buf_hold);
350DEFINE_BUF_EVENT(xfs_buf_rele); 350DEFINE_BUF_EVENT(xfs_buf_rele);
351DEFINE_BUF_EVENT(xfs_buf_iodone); 351DEFINE_BUF_EVENT(xfs_buf_iodone);
352DEFINE_BUF_EVENT(xfs_buf_iorequest); 352DEFINE_BUF_EVENT(xfs_buf_submit);
353DEFINE_BUF_EVENT(xfs_buf_submit_wait);
353DEFINE_BUF_EVENT(xfs_buf_bawrite); 354DEFINE_BUF_EVENT(xfs_buf_bawrite);
354DEFINE_BUF_EVENT(xfs_buf_lock); 355DEFINE_BUF_EVENT(xfs_buf_lock);
355DEFINE_BUF_EVENT(xfs_buf_lock_done); 356DEFINE_BUF_EVENT(xfs_buf_lock_done);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 96c898e7ac9a..e2b2216b1635 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -318,20 +318,10 @@ xfs_trans_read_buf_map(
318 XFS_BUF_READ(bp); 318 XFS_BUF_READ(bp);
319 bp->b_ops = ops; 319 bp->b_ops = ops;
320 320
321 /* 321 error = xfs_buf_submit_wait(bp);
322 * XXX(hch): clean up the error handling here to be less
323 * of a mess..
324 */
325 if (XFS_FORCED_SHUTDOWN(mp)) {
326 trace_xfs_bdstrat_shut(bp, _RET_IP_);
327 xfs_bioerror_relse(bp);
328 } else {
329 xfs_buf_iorequest(bp);
330 }
331
332 error = xfs_buf_iowait(bp);
333 if (error) { 322 if (error) {
334 xfs_buf_ioerror_alert(bp, __func__); 323 if (!XFS_FORCED_SHUTDOWN(mp))
324 xfs_buf_ioerror_alert(bp, __func__);
335 xfs_buf_relse(bp); 325 xfs_buf_relse(bp);
336 /* 326 /*
337 * We can gracefully recover from most read 327 * We can gracefully recover from most read