summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorGuilherme G. Piccoli <gpiccoli@canonical.com>2019-09-03 15:49:00 -0400
committerSong Liu <songliubraving@fb.com>2019-09-03 17:49:28 -0400
commit62f7b1989c02feed9274131b2fd5e990de4aba6f (patch)
treed6bf6a9b8e10bc8c6aa2e1cd27dd82b463bd0f18 /drivers/md
parenta22a9602b88fabf10847f238ff81fde5f906fef7 (diff)
md raid0/linear: Mark array as 'broken' and fail BIOs if a member is gone
Currently md raid0/linear are not provided with any mechanism to validate if an array member got removed or failed. The driver keeps sending BIOs regardless of the state of array members, and kernel shows state 'clean' in the 'array_state' sysfs attribute. This leads to the following situation: if a raid0/linear array member is removed and the array is mounted, some user writing to this array won't realize that errors are happening unless they check dmesg or perform one fsync per written file. Despite udev signaling the member device is gone, 'mdadm' cannot issue the STOP_ARRAY ioctl successfully, given the array is mounted. In other words, no -EIO is returned and writes (except direct ones) appear normal. Meaning the user might think the wrote data is correctly stored in the array, but instead garbage was written given that raid0 does stripping (and so, it requires all its members to be working in order to not corrupt data). For md/linear, writes to the available members will work fine, but if the writes go to the missing member(s), it'll cause a file corruption situation, whereas the portion of the writes to the missing devices aren't written effectively. This patch changes this behavior: we check if the block device's gendisk is UP when submitting the BIO to the array member, and if it isn't, we flag the md device as MD_BROKEN and fail subsequent I/Os to that device; a read request to the array requiring data from a valid member is still completed. While flagging the device as MD_BROKEN, we also show a rate-limited warning in the kernel log. A new array state 'broken' was added too: it mimics the state 'clean' in every aspect, being useful only to distinguish if the array has some member missing. We rely on the MD_BROKEN flag to put the array in the 'broken' state. This state cannot be written in 'array_state' as it just shows one or more members of the array are missing but acts like 'clean', it wouldn't make sense to write it. With this patch, the filesystem reacts much faster to the event of missing array member: after some I/O errors, ext4 for instance aborts the journal and prevents corruption. Without this change, we're able to keep writing in the disk and after a machine reboot, e2fsck shows some severe fs errors that demand fixing. This patch was tested in ext4 and xfs filesystems, and requires a 'mdadm' counterpart to handle the 'broken' state. Cc: Song Liu <songliubraving@fb.com> Reviewed-by: NeilBrown <neilb@suse.de> Signed-off-by: Guilherme G. Piccoli <gpiccoli@canonical.com> Signed-off-by: Song Liu <songliubraving@fb.com>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/md-linear.c5
-rw-r--r--drivers/md/md.c22
-rw-r--r--drivers/md/md.h16
-rw-r--r--drivers/md/raid0.c6
4 files changed, 45 insertions, 4 deletions
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index 7354466ddc90..c766c559d36d 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -258,6 +258,11 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
258 bio_sector < start_sector)) 258 bio_sector < start_sector))
259 goto out_of_bounds; 259 goto out_of_bounds;
260 260
261 if (unlikely(is_mddev_broken(tmp_dev->rdev, "linear"))) {
262 bio_io_error(bio);
263 return true;
264 }
265
261 if (unlikely(bio_end_sector(bio) > end_sector)) { 266 if (unlikely(bio_end_sector(bio) > end_sector)) {
262 /* This bio crosses a device boundary, so we have to split it */ 267 /* This bio crosses a device boundary, so we have to split it */
263 struct bio *split = bio_split(bio, end_sector - bio_sector, 268 struct bio *split = bio_split(bio, end_sector - bio_sector,
diff --git a/drivers/md/md.c b/drivers/md/md.c
index b46bb143e3c5..73d5a1b04022 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -376,6 +376,11 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
376 struct mddev *mddev = q->queuedata; 376 struct mddev *mddev = q->queuedata;
377 unsigned int sectors; 377 unsigned int sectors;
378 378
379 if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
380 bio_io_error(bio);
381 return BLK_QC_T_NONE;
382 }
383
379 blk_queue_split(q, &bio); 384 blk_queue_split(q, &bio);
380 385
381 if (mddev == NULL || mddev->pers == NULL) { 386 if (mddev == NULL || mddev->pers == NULL) {
@@ -4158,12 +4163,17 @@ __ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
4158 * active-idle 4163 * active-idle
4159 * like active, but no writes have been seen for a while (100msec). 4164 * like active, but no writes have been seen for a while (100msec).
4160 * 4165 *
4166 * broken
4167 * RAID0/LINEAR-only: same as clean, but array is missing a member.
4168 * It's useful because RAID0/LINEAR mounted-arrays aren't stopped
4169 * when a member is gone, so this state will at least alert the
4170 * user that something is wrong.
4161 */ 4171 */
4162enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active, 4172enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
4163 write_pending, active_idle, bad_word}; 4173 write_pending, active_idle, broken, bad_word};
4164static char *array_states[] = { 4174static char *array_states[] = {
4165 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active", 4175 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
4166 "write-pending", "active-idle", NULL }; 4176 "write-pending", "active-idle", "broken", NULL };
4167 4177
4168static int match_word(const char *word, char **list) 4178static int match_word(const char *word, char **list)
4169{ 4179{
@@ -4179,7 +4189,7 @@ array_state_show(struct mddev *mddev, char *page)
4179{ 4189{
4180 enum array_state st = inactive; 4190 enum array_state st = inactive;
4181 4191
4182 if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) 4192 if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) {
4183 switch(mddev->ro) { 4193 switch(mddev->ro) {
4184 case 1: 4194 case 1:
4185 st = readonly; 4195 st = readonly;
@@ -4199,7 +4209,10 @@ array_state_show(struct mddev *mddev, char *page)
4199 st = active; 4209 st = active;
4200 spin_unlock(&mddev->lock); 4210 spin_unlock(&mddev->lock);
4201 } 4211 }
4202 else { 4212
4213 if (test_bit(MD_BROKEN, &mddev->flags) && st == clean)
4214 st = broken;
4215 } else {
4203 if (list_empty(&mddev->disks) && 4216 if (list_empty(&mddev->disks) &&
4204 mddev->raid_disks == 0 && 4217 mddev->raid_disks == 0 &&
4205 mddev->dev_sectors == 0) 4218 mddev->dev_sectors == 0)
@@ -4313,6 +4326,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
4313 break; 4326 break;
4314 case write_pending: 4327 case write_pending:
4315 case active_idle: 4328 case active_idle:
4329 case broken:
4316 /* these cannot be set */ 4330 /* these cannot be set */
4317 break; 4331 break;
4318 } 4332 }
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 1edcd967eb8e..c5e3ff398b59 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -251,6 +251,9 @@ enum mddev_flags {
251 MD_NOT_READY, /* do_md_run() is active, so 'array_state' 251 MD_NOT_READY, /* do_md_run() is active, so 'array_state'
252 * must not report that array is ready yet 252 * must not report that array is ready yet
253 */ 253 */
254 MD_BROKEN, /* This is used in RAID-0/LINEAR only, to stop
255 * I/O in case an array member is gone/failed.
256 */
254}; 257};
255 258
256enum mddev_sb_flags { 259enum mddev_sb_flags {
@@ -739,6 +742,19 @@ extern void mddev_create_wb_pool(struct mddev *mddev, struct md_rdev *rdev,
739struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr); 742struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
740struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev); 743struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
741 744
745static inline bool is_mddev_broken(struct md_rdev *rdev, const char *md_type)
746{
747 int flags = rdev->bdev->bd_disk->flags;
748
749 if (!(flags & GENHD_FL_UP)) {
750 if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
751 pr_warn("md: %s: %s array has a missing/failed member\n",
752 mdname(rdev->mddev), md_type);
753 return true;
754 }
755 return false;
756}
757
742static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev) 758static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
743{ 759{
744 int faulty = test_bit(Faulty, &rdev->flags); 760 int faulty = test_bit(Faulty, &rdev->flags);
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index bf5cf184a260..bc422eae2c95 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -586,6 +586,12 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
586 586
587 zone = find_zone(mddev->private, &sector); 587 zone = find_zone(mddev->private, &sector);
588 tmp_dev = map_sector(mddev, zone, sector, &sector); 588 tmp_dev = map_sector(mddev, zone, sector, &sector);
589
590 if (unlikely(is_mddev_broken(tmp_dev, "raid0"))) {
591 bio_io_error(bio);
592 return true;
593 }
594
589 bio_set_dev(bio, tmp_dev->bdev); 595 bio_set_dev(bio, tmp_dev->bdev);
590 bio->bi_iter.bi_sector = sector + zone->dev_start + 596 bio->bi_iter.bi_sector = sector + zone->dev_start +
591 tmp_dev->data_offset; 597 tmp_dev->data_offset;