aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_buf_item.c
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /fs/xfs/xfs_buf_item.c
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'fs/xfs/xfs_buf_item.c')
-rw-r--r--fs/xfs/xfs_buf_item.c203
1 files changed, 81 insertions, 122 deletions
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 1b09d7a280df..7b7e005e3dcc 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -130,10 +130,12 @@ xfs_buf_item_log_check(
130 orig = bip->bli_orig; 130 orig = bip->bli_orig;
131 buffer = XFS_BUF_PTR(bp); 131 buffer = XFS_BUF_PTR(bp);
132 for (x = 0; x < XFS_BUF_COUNT(bp); x++) { 132 for (x = 0; x < XFS_BUF_COUNT(bp); x++) {
133 if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) 133 if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) {
134 cmn_err(CE_PANIC, 134 xfs_emerg(bp->b_mount,
135 "xfs_buf_item_log_check bip %x buffer %x orig %x index %d", 135 "%s: bip %x buffer %x orig %x index %d",
136 bip, bp, orig, x); 136 __func__, bip, bp, orig, x);
137 ASSERT(0);
138 }
137 } 139 }
138} 140}
139#else 141#else
@@ -141,8 +143,7 @@ xfs_buf_item_log_check(
141#define xfs_buf_item_log_check(x) 143#define xfs_buf_item_log_check(x)
142#endif 144#endif
143 145
144STATIC void xfs_buf_error_relse(xfs_buf_t *bp); 146STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp);
145STATIC void xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip);
146 147
147/* 148/*
148 * This returns the number of log iovecs needed to log the 149 * This returns the number of log iovecs needed to log the
@@ -428,13 +429,15 @@ xfs_buf_item_unpin(
428 429
429 if (remove) { 430 if (remove) {
430 /* 431 /*
431 * We have to remove the log item from the transaction 432 * If we are in a transaction context, we have to
432 * as we are about to release our reference to the 433 * remove the log item from the transaction as we are
433 * buffer. If we don't, the unlock that occurs later 434 * about to release our reference to the buffer. If we
434 * in xfs_trans_uncommit() will ry to reference the 435 * don't, the unlock that occurs later in
436 * xfs_trans_uncommit() will try to reference the
435 * buffer which we no longer have a hold on. 437 * buffer which we no longer have a hold on.
436 */ 438 */
437 xfs_trans_del_item(lip); 439 if (lip->li_desc)
440 xfs_trans_del_item(lip);
438 441
439 /* 442 /*
440 * Since the transaction no longer refers to the buffer, 443 * Since the transaction no longer refers to the buffer,
@@ -450,7 +453,7 @@ xfs_buf_item_unpin(
450 * xfs_trans_ail_delete() drops the AIL lock. 453 * xfs_trans_ail_delete() drops the AIL lock.
451 */ 454 */
452 if (bip->bli_flags & XFS_BLI_STALE_INODE) { 455 if (bip->bli_flags & XFS_BLI_STALE_INODE) {
453 xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip); 456 xfs_buf_do_callbacks(bp);
454 XFS_BUF_SET_FSPRIVATE(bp, NULL); 457 XFS_BUF_SET_FSPRIVATE(bp, NULL);
455 XFS_BUF_CLR_IODONE_FUNC(bp); 458 XFS_BUF_CLR_IODONE_FUNC(bp);
456 } else { 459 } else {
@@ -692,8 +695,7 @@ xfs_buf_item_init(
692 * the first. If we do already have one, there is 695 * the first. If we do already have one, there is
693 * nothing to do here so return. 696 * nothing to do here so return.
694 */ 697 */
695 if (bp->b_mount != mp) 698 ASSERT(bp->b_target->bt_mount == mp);
696 bp->b_mount = mp;
697 if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { 699 if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
698 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 700 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
699 if (lip->li_type == XFS_LI_BUF) { 701 if (lip->li_type == XFS_LI_BUF) {
@@ -919,15 +921,26 @@ xfs_buf_attach_iodone(
919 XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); 921 XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
920} 922}
921 923
924/*
925 * We can have many callbacks on a buffer. Running the callbacks individually
926 * can cause a lot of contention on the AIL lock, so we allow for a single
927 * callback to be able to scan the remaining lip->li_bio_list for other items
928 * of the same type and callback to be processed in the first call.
929 *
930 * As a result, the loop walking the callback list below will also modify the
931 * list. it removes the first item from the list and then runs the callback.
932 * The loop then restarts from the new head of the list. This allows the
933 * callback to scan and modify the list attached to the buffer and we don't
934 * have to care about maintaining a next item pointer.
935 */
922STATIC void 936STATIC void
923xfs_buf_do_callbacks( 937xfs_buf_do_callbacks(
924 xfs_buf_t *bp, 938 struct xfs_buf *bp)
925 xfs_log_item_t *lip)
926{ 939{
927 xfs_log_item_t *nlip; 940 struct xfs_log_item *lip;
928 941
929 while (lip != NULL) { 942 while ((lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *)) != NULL) {
930 nlip = lip->li_bio_list; 943 XFS_BUF_SET_FSPRIVATE(bp, lip->li_bio_list);
931 ASSERT(lip->li_cb != NULL); 944 ASSERT(lip->li_cb != NULL);
932 /* 945 /*
933 * Clear the next pointer so we don't have any 946 * Clear the next pointer so we don't have any
@@ -937,7 +950,6 @@ xfs_buf_do_callbacks(
937 */ 950 */
938 lip->li_bio_list = NULL; 951 lip->li_bio_list = NULL;
939 lip->li_cb(bp, lip); 952 lip->li_cb(bp, lip);
940 lip = nlip;
941 } 953 }
942} 954}
943 955
@@ -950,128 +962,75 @@ xfs_buf_do_callbacks(
950 */ 962 */
951void 963void
952xfs_buf_iodone_callbacks( 964xfs_buf_iodone_callbacks(
953 xfs_buf_t *bp) 965 struct xfs_buf *bp)
954{ 966{
955 xfs_log_item_t *lip; 967 struct xfs_log_item *lip = bp->b_fspriv;
956 static ulong lasttime; 968 struct xfs_mount *mp = lip->li_mountp;
957 static xfs_buftarg_t *lasttarg; 969 static ulong lasttime;
958 xfs_mount_t *mp; 970 static xfs_buftarg_t *lasttarg;
959 971
960 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 972 if (likely(!XFS_BUF_GETERROR(bp)))
961 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 973 goto do_callbacks;
962 974
963 if (XFS_BUF_GETERROR(bp) != 0) { 975 /*
964 /* 976 * If we've already decided to shutdown the filesystem because of
965 * If we've already decided to shutdown the filesystem 977 * I/O errors, there's no point in giving this a retry.
966 * because of IO errors, there's no point in giving this 978 */
967 * a retry. 979 if (XFS_FORCED_SHUTDOWN(mp)) {
968 */ 980 XFS_BUF_SUPER_STALE(bp);
969 mp = lip->li_mountp; 981 trace_xfs_buf_item_iodone(bp, _RET_IP_);
970 if (XFS_FORCED_SHUTDOWN(mp)) { 982 goto do_callbacks;
971 ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); 983 }
972 XFS_BUF_SUPER_STALE(bp);
973 trace_xfs_buf_item_iodone(bp, _RET_IP_);
974 xfs_buf_do_callbacks(bp, lip);
975 XFS_BUF_SET_FSPRIVATE(bp, NULL);
976 XFS_BUF_CLR_IODONE_FUNC(bp);
977 xfs_biodone(bp);
978 return;
979 }
980 984
981 if ((XFS_BUF_TARGET(bp) != lasttarg) || 985 if (XFS_BUF_TARGET(bp) != lasttarg ||
982 (time_after(jiffies, (lasttime + 5*HZ)))) { 986 time_after(jiffies, (lasttime + 5*HZ))) {
983 lasttime = jiffies; 987 lasttime = jiffies;
984 cmn_err(CE_ALERT, "Device %s, XFS metadata write error" 988 xfs_alert(mp, "Device %s: metadata write error block 0x%llx",
985 " block 0x%llx in %s", 989 XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
986 XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), 990 (__uint64_t)XFS_BUF_ADDR(bp));
987 (__uint64_t)XFS_BUF_ADDR(bp), mp->m_fsname); 991 }
988 } 992 lasttarg = XFS_BUF_TARGET(bp);
989 lasttarg = XFS_BUF_TARGET(bp);
990 993
991 if (XFS_BUF_ISASYNC(bp)) { 994 /*
992 /* 995 * If the write was asynchronous then no one will be looking for the
993 * If the write was asynchronous then noone will be 996 * error. Clear the error state and write the buffer out again.
994 * looking for the error. Clear the error state 997 *
995 * and write the buffer out again delayed write. 998 * During sync or umount we'll write all pending buffers again
996 * 999 * synchronous, which will catch these errors if they keep hanging
997 * XXXsup This is OK, so long as we catch these 1000 * around.
998 * before we start the umount; we don't want these 1001 */
999 * DELWRI metadata bufs to be hanging around. 1002 if (XFS_BUF_ISASYNC(bp)) {
1000 */ 1003 XFS_BUF_ERROR(bp, 0); /* errno of 0 unsets the flag */
1001 XFS_BUF_ERROR(bp,0); /* errno of 0 unsets the flag */ 1004
1002 1005 if (!XFS_BUF_ISSTALE(bp)) {
1003 if (!(XFS_BUF_ISSTALE(bp))) { 1006 XFS_BUF_DELAYWRITE(bp);
1004 XFS_BUF_DELAYWRITE(bp);
1005 XFS_BUF_DONE(bp);
1006 XFS_BUF_SET_START(bp);
1007 }
1008 ASSERT(XFS_BUF_IODONE_FUNC(bp));
1009 trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
1010 xfs_buf_relse(bp);
1011 } else {
1012 /*
1013 * If the write of the buffer was not asynchronous,
1014 * then we want to make sure to return the error
1015 * to the caller of bwrite(). Because of this we
1016 * cannot clear the B_ERROR state at this point.
1017 * Instead we install a callback function that
1018 * will be called when the buffer is released, and
1019 * that routine will clear the error state and
1020 * set the buffer to be written out again after
1021 * some delay.
1022 */
1023 /* We actually overwrite the existing b-relse
1024 function at times, but we're gonna be shutting down
1025 anyway. */
1026 XFS_BUF_SET_BRELSE_FUNC(bp,xfs_buf_error_relse);
1027 XFS_BUF_DONE(bp); 1007 XFS_BUF_DONE(bp);
1028 XFS_BUF_FINISH_IOWAIT(bp); 1008 XFS_BUF_SET_START(bp);
1029 } 1009 }
1010 ASSERT(XFS_BUF_IODONE_FUNC(bp));
1011 trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
1012 xfs_buf_relse(bp);
1030 return; 1013 return;
1031 } 1014 }
1032 1015
1033 xfs_buf_do_callbacks(bp, lip); 1016 /*
1034 XFS_BUF_SET_FSPRIVATE(bp, NULL); 1017 * If the write of the buffer was synchronous, we want to make
1035 XFS_BUF_CLR_IODONE_FUNC(bp); 1018 * sure to return the error to the caller of xfs_bwrite().
1036 xfs_biodone(bp); 1019 */
1037}
1038
1039/*
1040 * This is a callback routine attached to a buffer which gets an error
1041 * when being written out synchronously.
1042 */
1043STATIC void
1044xfs_buf_error_relse(
1045 xfs_buf_t *bp)
1046{
1047 xfs_log_item_t *lip;
1048 xfs_mount_t *mp;
1049
1050 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
1051 mp = (xfs_mount_t *)lip->li_mountp;
1052 ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp);
1053
1054 XFS_BUF_STALE(bp); 1020 XFS_BUF_STALE(bp);
1055 XFS_BUF_DONE(bp); 1021 XFS_BUF_DONE(bp);
1056 XFS_BUF_UNDELAYWRITE(bp); 1022 XFS_BUF_UNDELAYWRITE(bp);
1057 XFS_BUF_ERROR(bp,0);
1058 1023
1059 trace_xfs_buf_error_relse(bp, _RET_IP_); 1024 trace_xfs_buf_error_relse(bp, _RET_IP_);
1025 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1060 1026
1061 if (! XFS_FORCED_SHUTDOWN(mp)) 1027do_callbacks:
1062 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 1028 xfs_buf_do_callbacks(bp);
1063 /*
1064 * We have to unpin the pinned buffers so do the
1065 * callbacks.
1066 */
1067 xfs_buf_do_callbacks(bp, lip);
1068 XFS_BUF_SET_FSPRIVATE(bp, NULL); 1029 XFS_BUF_SET_FSPRIVATE(bp, NULL);
1069 XFS_BUF_CLR_IODONE_FUNC(bp); 1030 XFS_BUF_CLR_IODONE_FUNC(bp);
1070 XFS_BUF_SET_BRELSE_FUNC(bp,NULL); 1031 xfs_buf_ioend(bp, 0);
1071 xfs_buf_relse(bp);
1072} 1032}
1073 1033
1074
1075/* 1034/*
1076 * This is the iodone() function for buffers which have been 1035 * This is the iodone() function for buffers which have been
1077 * logged. It is called when they are eventually flushed out. 1036 * logged. It is called when they are eventually flushed out.