aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md.h
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-07-27 21:31:48 -0400
committerNeilBrown <neilb@suse.de>2011-07-27 21:31:48 -0400
commitde393cdea66cbd63c90725663f400c76faf1b255 (patch)
tree6a2bf37bee98bf7de42856f904bd23c81e082f8e /drivers/md/md.h
parentd7a9d443bc8a75a24873c0506f50051edfedc714 (diff)
md: make it easier to wait for bad blocks to be acknowledged.
It is only safe to choose not to write to a bad block if that bad block is safely recorded in metadata - i.e. if it has been 'acknowledged'. If it hasn't we need to wait for the acknowledgement. We support that using rdev->blocked wait and md_wait_for_blocked_rdev by introducing a new device flag 'BlockedBadBlock'. This flag is only advisory. It is cleared whenever we acknowledge a bad block, so that a waiter can re-check the particular bad blocks that it is interested it. It should be set by a caller when they find they need to wait. This (set after test) is inherently racy, but as md_wait_for_blocked_rdev already has a timeout, losing the race will have minimal impact. When we clear "Blocked" was also clear "BlockedBadBlocks" incase it was set incorrectly (see above race). We also modify the way we manage 'Blocked' to fit better with the new handling of 'BlockedBadBlocks' and to make it consistent between externally managed and internally managed metadata. This requires that each raidXd loop checks if the metadata needs to be written and triggers a write (md_check_recovery) if needed. Otherwise a queued write request might cause raidXd to wait for the metadata to write, and only that thread can write it. Before writing metadata, we set FaultRecorded for all devices that are Faulty, then after writing the metadata we clear Blocked for any device for which the Fault was certainly Recorded. The 'faulty' device flag now appears in sysfs if the device is faulty *or* it has unacknowledged bad blocks. So user-space which does not understand bad blocks can continue to function correctly. User space which does, should not assume a device is faulty until it sees the 'faulty' flag, and then sees the list of unacknowledged bad blocks is empty. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md/md.h')
-rw-r--r--drivers/md/md.h25
1 files changed, 23 insertions, 2 deletions
diff --git a/drivers/md/md.h b/drivers/md/md.h
index fa4b607854ac..1e586bb4452e 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -81,12 +81,29 @@ struct mdk_rdev_s
81#define In_sync 2 /* device is in_sync with rest of array */ 81#define In_sync 2 /* device is in_sync with rest of array */
82#define WriteMostly 4 /* Avoid reading if at all possible */ 82#define WriteMostly 4 /* Avoid reading if at all possible */
83#define AutoDetected 7 /* added by auto-detect */ 83#define AutoDetected 7 /* added by auto-detect */
84#define Blocked 8 /* An error occurred on an externally 84#define Blocked 8 /* An error occurred but has not yet
85 * managed array, don't allow writes 85 * been acknowledged by the metadata
86 * handler, so don't allow writes
86 * until it is cleared */ 87 * until it is cleared */
87#define WriteErrorSeen 9 /* A write error has been seen on this 88#define WriteErrorSeen 9 /* A write error has been seen on this
88 * device 89 * device
89 */ 90 */
91#define FaultRecorded 10 /* Intermediate state for clearing
92 * Blocked. The Fault is/will-be
93 * recorded in the metadata, but that
94 * metadata hasn't been stored safely
95 * on disk yet.
96 */
97#define BlockedBadBlocks 11 /* A writer is blocked because they
98 * found an unacknowledged bad-block.
99 * This can safely be cleared at any
100 * time, and the writer will re-check.
101 * It may be set at any time, and at
102 * worst the writer will timeout and
103 * re-check. So setting it as
104 * accurately as possible is good, but
105 * not absolutely critical.
106 */
90 wait_queue_head_t blocked_wait; 107 wait_queue_head_t blocked_wait;
91 108
92 int desc_nr; /* descriptor index in the superblock */ 109 int desc_nr; /* descriptor index in the superblock */
@@ -124,6 +141,10 @@ struct mdk_rdev_s
124 141
125 struct badblocks { 142 struct badblocks {
126 int count; /* count of bad blocks */ 143 int count; /* count of bad blocks */
144 int unacked_exist; /* there probably are unacknowledged
145 * bad blocks. This is only cleared
146 * when a read discovers none
147 */
127 int shift; /* shift from sectors to block size 148 int shift; /* shift from sectors to block size
128 * a -ve shift means badblocks are 149 * a -ve shift means badblocks are
129 * disabled.*/ 150 * disabled.*/