diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /fs/xfs/xfs_buf_item.c | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'fs/xfs/xfs_buf_item.c')
-rw-r--r-- | fs/xfs/xfs_buf_item.c | 203 |
1 files changed, 81 insertions, 122 deletions
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 1b09d7a280df..7b7e005e3dcc 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -130,10 +130,12 @@ xfs_buf_item_log_check( | |||
130 | orig = bip->bli_orig; | 130 | orig = bip->bli_orig; |
131 | buffer = XFS_BUF_PTR(bp); | 131 | buffer = XFS_BUF_PTR(bp); |
132 | for (x = 0; x < XFS_BUF_COUNT(bp); x++) { | 132 | for (x = 0; x < XFS_BUF_COUNT(bp); x++) { |
133 | if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) | 133 | if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) { |
134 | cmn_err(CE_PANIC, | 134 | xfs_emerg(bp->b_mount, |
135 | "xfs_buf_item_log_check bip %x buffer %x orig %x index %d", | 135 | "%s: bip %x buffer %x orig %x index %d", |
136 | bip, bp, orig, x); | 136 | __func__, bip, bp, orig, x); |
137 | ASSERT(0); | ||
138 | } | ||
137 | } | 139 | } |
138 | } | 140 | } |
139 | #else | 141 | #else |
@@ -141,8 +143,7 @@ xfs_buf_item_log_check( | |||
141 | #define xfs_buf_item_log_check(x) | 143 | #define xfs_buf_item_log_check(x) |
142 | #endif | 144 | #endif |
143 | 145 | ||
144 | STATIC void xfs_buf_error_relse(xfs_buf_t *bp); | 146 | STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp); |
145 | STATIC void xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip); | ||
146 | 147 | ||
147 | /* | 148 | /* |
148 | * This returns the number of log iovecs needed to log the | 149 | * This returns the number of log iovecs needed to log the |
@@ -428,13 +429,15 @@ xfs_buf_item_unpin( | |||
428 | 429 | ||
429 | if (remove) { | 430 | if (remove) { |
430 | /* | 431 | /* |
431 | * We have to remove the log item from the transaction | 432 | * If we are in a transaction context, we have to |
432 | * as we are about to release our reference to the | 433 | * remove the log item from the transaction as we are |
433 | * buffer. If we don't, the unlock that occurs later | 434 | * about to release our reference to the buffer. If we |
434 | * in xfs_trans_uncommit() will ry to reference the | 435 | * don't, the unlock that occurs later in |
436 | * xfs_trans_uncommit() will try to reference the | ||
435 | * buffer which we no longer have a hold on. | 437 | * buffer which we no longer have a hold on. |
436 | */ | 438 | */ |
437 | xfs_trans_del_item(lip); | 439 | if (lip->li_desc) |
440 | xfs_trans_del_item(lip); | ||
438 | 441 | ||
439 | /* | 442 | /* |
440 | * Since the transaction no longer refers to the buffer, | 443 | * Since the transaction no longer refers to the buffer, |
@@ -450,7 +453,7 @@ xfs_buf_item_unpin( | |||
450 | * xfs_trans_ail_delete() drops the AIL lock. | 453 | * xfs_trans_ail_delete() drops the AIL lock. |
451 | */ | 454 | */ |
452 | if (bip->bli_flags & XFS_BLI_STALE_INODE) { | 455 | if (bip->bli_flags & XFS_BLI_STALE_INODE) { |
453 | xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip); | 456 | xfs_buf_do_callbacks(bp); |
454 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 457 | XFS_BUF_SET_FSPRIVATE(bp, NULL); |
455 | XFS_BUF_CLR_IODONE_FUNC(bp); | 458 | XFS_BUF_CLR_IODONE_FUNC(bp); |
456 | } else { | 459 | } else { |
@@ -692,8 +695,7 @@ xfs_buf_item_init( | |||
692 | * the first. If we do already have one, there is | 695 | * the first. If we do already have one, there is |
693 | * nothing to do here so return. | 696 | * nothing to do here so return. |
694 | */ | 697 | */ |
695 | if (bp->b_mount != mp) | 698 | ASSERT(bp->b_target->bt_mount == mp); |
696 | bp->b_mount = mp; | ||
697 | if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { | 699 | if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { |
698 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | 700 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); |
699 | if (lip->li_type == XFS_LI_BUF) { | 701 | if (lip->li_type == XFS_LI_BUF) { |
@@ -919,15 +921,26 @@ xfs_buf_attach_iodone( | |||
919 | XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); | 921 | XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); |
920 | } | 922 | } |
921 | 923 | ||
924 | /* | ||
925 | * We can have many callbacks on a buffer. Running the callbacks individually | ||
926 | * can cause a lot of contention on the AIL lock, so we allow for a single | ||
927 | * callback to be able to scan the remaining lip->li_bio_list for other items | ||
928 | * of the same type and callback to be processed in the first call. | ||
929 | * | ||
930 | * As a result, the loop walking the callback list below will also modify the | ||
931 | * list. it removes the first item from the list and then runs the callback. | ||
932 | * The loop then restarts from the new head of the list. This allows the | ||
933 | * callback to scan and modify the list attached to the buffer and we don't | ||
934 | * have to care about maintaining a next item pointer. | ||
935 | */ | ||
922 | STATIC void | 936 | STATIC void |
923 | xfs_buf_do_callbacks( | 937 | xfs_buf_do_callbacks( |
924 | xfs_buf_t *bp, | 938 | struct xfs_buf *bp) |
925 | xfs_log_item_t *lip) | ||
926 | { | 939 | { |
927 | xfs_log_item_t *nlip; | 940 | struct xfs_log_item *lip; |
928 | 941 | ||
929 | while (lip != NULL) { | 942 | while ((lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *)) != NULL) { |
930 | nlip = lip->li_bio_list; | 943 | XFS_BUF_SET_FSPRIVATE(bp, lip->li_bio_list); |
931 | ASSERT(lip->li_cb != NULL); | 944 | ASSERT(lip->li_cb != NULL); |
932 | /* | 945 | /* |
933 | * Clear the next pointer so we don't have any | 946 | * Clear the next pointer so we don't have any |
@@ -937,7 +950,6 @@ xfs_buf_do_callbacks( | |||
937 | */ | 950 | */ |
938 | lip->li_bio_list = NULL; | 951 | lip->li_bio_list = NULL; |
939 | lip->li_cb(bp, lip); | 952 | lip->li_cb(bp, lip); |
940 | lip = nlip; | ||
941 | } | 953 | } |
942 | } | 954 | } |
943 | 955 | ||
@@ -950,128 +962,75 @@ xfs_buf_do_callbacks( | |||
950 | */ | 962 | */ |
951 | void | 963 | void |
952 | xfs_buf_iodone_callbacks( | 964 | xfs_buf_iodone_callbacks( |
953 | xfs_buf_t *bp) | 965 | struct xfs_buf *bp) |
954 | { | 966 | { |
955 | xfs_log_item_t *lip; | 967 | struct xfs_log_item *lip = bp->b_fspriv; |
956 | static ulong lasttime; | 968 | struct xfs_mount *mp = lip->li_mountp; |
957 | static xfs_buftarg_t *lasttarg; | 969 | static ulong lasttime; |
958 | xfs_mount_t *mp; | 970 | static xfs_buftarg_t *lasttarg; |
959 | 971 | ||
960 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); | 972 | if (likely(!XFS_BUF_GETERROR(bp))) |
961 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | 973 | goto do_callbacks; |
962 | 974 | ||
963 | if (XFS_BUF_GETERROR(bp) != 0) { | 975 | /* |
964 | /* | 976 | * If we've already decided to shutdown the filesystem because of |
965 | * If we've already decided to shutdown the filesystem | 977 | * I/O errors, there's no point in giving this a retry. |
966 | * because of IO errors, there's no point in giving this | 978 | */ |
967 | * a retry. | 979 | if (XFS_FORCED_SHUTDOWN(mp)) { |
968 | */ | 980 | XFS_BUF_SUPER_STALE(bp); |
969 | mp = lip->li_mountp; | 981 | trace_xfs_buf_item_iodone(bp, _RET_IP_); |
970 | if (XFS_FORCED_SHUTDOWN(mp)) { | 982 | goto do_callbacks; |
971 | ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); | 983 | } |
972 | XFS_BUF_SUPER_STALE(bp); | ||
973 | trace_xfs_buf_item_iodone(bp, _RET_IP_); | ||
974 | xfs_buf_do_callbacks(bp, lip); | ||
975 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | ||
976 | XFS_BUF_CLR_IODONE_FUNC(bp); | ||
977 | xfs_biodone(bp); | ||
978 | return; | ||
979 | } | ||
980 | 984 | ||
981 | if ((XFS_BUF_TARGET(bp) != lasttarg) || | 985 | if (XFS_BUF_TARGET(bp) != lasttarg || |
982 | (time_after(jiffies, (lasttime + 5*HZ)))) { | 986 | time_after(jiffies, (lasttime + 5*HZ))) { |
983 | lasttime = jiffies; | 987 | lasttime = jiffies; |
984 | cmn_err(CE_ALERT, "Device %s, XFS metadata write error" | 988 | xfs_alert(mp, "Device %s: metadata write error block 0x%llx", |
985 | " block 0x%llx in %s", | 989 | XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), |
986 | XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), | 990 | (__uint64_t)XFS_BUF_ADDR(bp)); |
987 | (__uint64_t)XFS_BUF_ADDR(bp), mp->m_fsname); | 991 | } |
988 | } | 992 | lasttarg = XFS_BUF_TARGET(bp); |
989 | lasttarg = XFS_BUF_TARGET(bp); | ||
990 | 993 | ||
991 | if (XFS_BUF_ISASYNC(bp)) { | 994 | /* |
992 | /* | 995 | * If the write was asynchronous then no one will be looking for the |
993 | * If the write was asynchronous then noone will be | 996 | * error. Clear the error state and write the buffer out again. |
994 | * looking for the error. Clear the error state | 997 | * |
995 | * and write the buffer out again delayed write. | 998 | * During sync or umount we'll write all pending buffers again |
996 | * | 999 | * synchronous, which will catch these errors if they keep hanging |
997 | * XXXsup This is OK, so long as we catch these | 1000 | * around. |
998 | * before we start the umount; we don't want these | 1001 | */ |
999 | * DELWRI metadata bufs to be hanging around. | 1002 | if (XFS_BUF_ISASYNC(bp)) { |
1000 | */ | 1003 | XFS_BUF_ERROR(bp, 0); /* errno of 0 unsets the flag */ |
1001 | XFS_BUF_ERROR(bp,0); /* errno of 0 unsets the flag */ | 1004 | |
1002 | 1005 | if (!XFS_BUF_ISSTALE(bp)) { | |
1003 | if (!(XFS_BUF_ISSTALE(bp))) { | 1006 | XFS_BUF_DELAYWRITE(bp); |
1004 | XFS_BUF_DELAYWRITE(bp); | ||
1005 | XFS_BUF_DONE(bp); | ||
1006 | XFS_BUF_SET_START(bp); | ||
1007 | } | ||
1008 | ASSERT(XFS_BUF_IODONE_FUNC(bp)); | ||
1009 | trace_xfs_buf_item_iodone_async(bp, _RET_IP_); | ||
1010 | xfs_buf_relse(bp); | ||
1011 | } else { | ||
1012 | /* | ||
1013 | * If the write of the buffer was not asynchronous, | ||
1014 | * then we want to make sure to return the error | ||
1015 | * to the caller of bwrite(). Because of this we | ||
1016 | * cannot clear the B_ERROR state at this point. | ||
1017 | * Instead we install a callback function that | ||
1018 | * will be called when the buffer is released, and | ||
1019 | * that routine will clear the error state and | ||
1020 | * set the buffer to be written out again after | ||
1021 | * some delay. | ||
1022 | */ | ||
1023 | /* We actually overwrite the existing b-relse | ||
1024 | function at times, but we're gonna be shutting down | ||
1025 | anyway. */ | ||
1026 | XFS_BUF_SET_BRELSE_FUNC(bp,xfs_buf_error_relse); | ||
1027 | XFS_BUF_DONE(bp); | 1007 | XFS_BUF_DONE(bp); |
1028 | XFS_BUF_FINISH_IOWAIT(bp); | 1008 | XFS_BUF_SET_START(bp); |
1029 | } | 1009 | } |
1010 | ASSERT(XFS_BUF_IODONE_FUNC(bp)); | ||
1011 | trace_xfs_buf_item_iodone_async(bp, _RET_IP_); | ||
1012 | xfs_buf_relse(bp); | ||
1030 | return; | 1013 | return; |
1031 | } | 1014 | } |
1032 | 1015 | ||
1033 | xfs_buf_do_callbacks(bp, lip); | 1016 | /* |
1034 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 1017 | * If the write of the buffer was synchronous, we want to make |
1035 | XFS_BUF_CLR_IODONE_FUNC(bp); | 1018 | * sure to return the error to the caller of xfs_bwrite(). |
1036 | xfs_biodone(bp); | 1019 | */ |
1037 | } | ||
1038 | |||
1039 | /* | ||
1040 | * This is a callback routine attached to a buffer which gets an error | ||
1041 | * when being written out synchronously. | ||
1042 | */ | ||
1043 | STATIC void | ||
1044 | xfs_buf_error_relse( | ||
1045 | xfs_buf_t *bp) | ||
1046 | { | ||
1047 | xfs_log_item_t *lip; | ||
1048 | xfs_mount_t *mp; | ||
1049 | |||
1050 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | ||
1051 | mp = (xfs_mount_t *)lip->li_mountp; | ||
1052 | ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); | ||
1053 | |||
1054 | XFS_BUF_STALE(bp); | 1020 | XFS_BUF_STALE(bp); |
1055 | XFS_BUF_DONE(bp); | 1021 | XFS_BUF_DONE(bp); |
1056 | XFS_BUF_UNDELAYWRITE(bp); | 1022 | XFS_BUF_UNDELAYWRITE(bp); |
1057 | XFS_BUF_ERROR(bp,0); | ||
1058 | 1023 | ||
1059 | trace_xfs_buf_error_relse(bp, _RET_IP_); | 1024 | trace_xfs_buf_error_relse(bp, _RET_IP_); |
1025 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); | ||
1060 | 1026 | ||
1061 | if (! XFS_FORCED_SHUTDOWN(mp)) | 1027 | do_callbacks: |
1062 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); | 1028 | xfs_buf_do_callbacks(bp); |
1063 | /* | ||
1064 | * We have to unpin the pinned buffers so do the | ||
1065 | * callbacks. | ||
1066 | */ | ||
1067 | xfs_buf_do_callbacks(bp, lip); | ||
1068 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 1029 | XFS_BUF_SET_FSPRIVATE(bp, NULL); |
1069 | XFS_BUF_CLR_IODONE_FUNC(bp); | 1030 | XFS_BUF_CLR_IODONE_FUNC(bp); |
1070 | XFS_BUF_SET_BRELSE_FUNC(bp,NULL); | 1031 | xfs_buf_ioend(bp, 0); |
1071 | xfs_buf_relse(bp); | ||
1072 | } | 1032 | } |
1073 | 1033 | ||
1074 | |||
1075 | /* | 1034 | /* |
1076 | * This is the iodone() function for buffers which have been | 1035 | * This is the iodone() function for buffers which have been |
1077 | * logged. It is called when they are eventually flushed out. | 1036 | * logged. It is called when they are eventually flushed out. |