aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-01-12 13:17:59 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-01-12 13:17:59 -0500
commit607ae5f26920b8dfedbbf882c0f9edd3b9aa6cf7 (patch)
tree2bd4990aaef9b2aeabae1c24d6ff388c5238142f
parentba836a6f5ab1243ff5e08a941a2d1de8b31244e1 (diff)
parent32cd7cbbacf700885a2316275f188f2d5739b5f4 (diff)
Merge tag 'md/4.10-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull md fixes from Shaohua Li: "Basically one fix for raid5 cache which is merged in this cycle, others are trival fixes" * tag 'md/4.10-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md: md/raid5: Use correct IS_ERR() variation on pointer check md: cleanup mddev flag clear for takeover md/r5cache: fix spelling mistake on "recoverying" md/r5cache: assign conf->log before r5l_load_log() md/r5cache: simplify handling of sh->log_start in recovery md/raid5-cache: removes unnecessary write-through mode judgments md/raid10: Refactor raid10_make_request md/raid1: Refactor raid1_make_request
-rw-r--r--drivers/md/md.h8
-rw-r--r--drivers/md/raid0.c12
-rw-r--r--drivers/md/raid1.c275
-rw-r--r--drivers/md/raid10.c245
-rw-r--r--drivers/md/raid5-cache.c36
-rw-r--r--drivers/md/raid5.c7
6 files changed, 322 insertions, 261 deletions
diff --git a/drivers/md/md.h b/drivers/md/md.h
index e38936d05df1..2a514036a83d 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -212,6 +212,7 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
212 int is_new); 212 int is_new);
213struct md_cluster_info; 213struct md_cluster_info;
214 214
215/* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added */
215enum mddev_flags { 216enum mddev_flags {
216 MD_ARRAY_FIRST_USE, /* First use of array, needs initialization */ 217 MD_ARRAY_FIRST_USE, /* First use of array, needs initialization */
217 MD_CLOSING, /* If set, we are closing the array, do not open 218 MD_CLOSING, /* If set, we are closing the array, do not open
@@ -702,4 +703,11 @@ static inline int mddev_is_clustered(struct mddev *mddev)
702{ 703{
703 return mddev->cluster_info && mddev->bitmap_info.nodes > 1; 704 return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
704} 705}
706
707/* clear unsupported mddev_flags */
708static inline void mddev_clear_unsupported_flags(struct mddev *mddev,
709 unsigned long unsupported_flags)
710{
711 mddev->flags &= ~unsupported_flags;
712}
705#endif /* _MD_MD_H */ 713#endif /* _MD_MD_H */
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index a162fedeb51a..848365d474f3 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -26,6 +26,11 @@
26#include "raid0.h" 26#include "raid0.h"
27#include "raid5.h" 27#include "raid5.h"
28 28
29#define UNSUPPORTED_MDDEV_FLAGS \
30 ((1L << MD_HAS_JOURNAL) | \
31 (1L << MD_JOURNAL_CLEAN) | \
32 (1L << MD_FAILFAST_SUPPORTED))
33
29static int raid0_congested(struct mddev *mddev, int bits) 34static int raid0_congested(struct mddev *mddev, int bits)
30{ 35{
31 struct r0conf *conf = mddev->private; 36 struct r0conf *conf = mddev->private;
@@ -539,8 +544,7 @@ static void *raid0_takeover_raid45(struct mddev *mddev)
539 mddev->delta_disks = -1; 544 mddev->delta_disks = -1;
540 /* make sure it will be not marked as dirty */ 545 /* make sure it will be not marked as dirty */
541 mddev->recovery_cp = MaxSector; 546 mddev->recovery_cp = MaxSector;
542 clear_bit(MD_HAS_JOURNAL, &mddev->flags); 547 mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);
543 clear_bit(MD_JOURNAL_CLEAN, &mddev->flags);
544 548
545 create_strip_zones(mddev, &priv_conf); 549 create_strip_zones(mddev, &priv_conf);
546 550
@@ -583,7 +587,7 @@ static void *raid0_takeover_raid10(struct mddev *mddev)
583 mddev->degraded = 0; 587 mddev->degraded = 0;
584 /* make sure it will be not marked as dirty */ 588 /* make sure it will be not marked as dirty */
585 mddev->recovery_cp = MaxSector; 589 mddev->recovery_cp = MaxSector;
586 clear_bit(MD_FAILFAST_SUPPORTED, &mddev->flags); 590 mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);
587 591
588 create_strip_zones(mddev, &priv_conf); 592 create_strip_zones(mddev, &priv_conf);
589 return priv_conf; 593 return priv_conf;
@@ -626,7 +630,7 @@ static void *raid0_takeover_raid1(struct mddev *mddev)
626 mddev->raid_disks = 1; 630 mddev->raid_disks = 1;
627 /* make sure it will be not marked as dirty */ 631 /* make sure it will be not marked as dirty */
628 mddev->recovery_cp = MaxSector; 632 mddev->recovery_cp = MaxSector;
629 clear_bit(MD_FAILFAST_SUPPORTED, &mddev->flags); 633 mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);
630 634
631 create_strip_zones(mddev, &priv_conf); 635 create_strip_zones(mddev, &priv_conf);
632 return priv_conf; 636 return priv_conf;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a1f3fbed9100..7b0f647bcccb 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -42,6 +42,10 @@
42#include "raid1.h" 42#include "raid1.h"
43#include "bitmap.h" 43#include "bitmap.h"
44 44
45#define UNSUPPORTED_MDDEV_FLAGS \
46 ((1L << MD_HAS_JOURNAL) | \
47 (1L << MD_JOURNAL_CLEAN))
48
45/* 49/*
46 * Number of guaranteed r1bios in case of extreme VM load: 50 * Number of guaranteed r1bios in case of extreme VM load:
47 */ 51 */
@@ -1066,17 +1070,107 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
1066 kfree(plug); 1070 kfree(plug);
1067} 1071}
1068 1072
1069static void raid1_make_request(struct mddev *mddev, struct bio * bio) 1073static void raid1_read_request(struct mddev *mddev, struct bio *bio,
1074 struct r1bio *r1_bio)
1070{ 1075{
1071 struct r1conf *conf = mddev->private; 1076 struct r1conf *conf = mddev->private;
1072 struct raid1_info *mirror; 1077 struct raid1_info *mirror;
1073 struct r1bio *r1_bio;
1074 struct bio *read_bio; 1078 struct bio *read_bio;
1079 struct bitmap *bitmap = mddev->bitmap;
1080 const int op = bio_op(bio);
1081 const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
1082 int sectors_handled;
1083 int max_sectors;
1084 int rdisk;
1085
1086 wait_barrier(conf, bio);
1087
1088read_again:
1089 rdisk = read_balance(conf, r1_bio, &max_sectors);
1090
1091 if (rdisk < 0) {
1092 /* couldn't find anywhere to read from */
1093 raid_end_bio_io(r1_bio);
1094 return;
1095 }
1096 mirror = conf->mirrors + rdisk;
1097
1098 if (test_bit(WriteMostly, &mirror->rdev->flags) &&
1099 bitmap) {
1100 /*
1101 * Reading from a write-mostly device must take care not to
1102 * over-take any writes that are 'behind'
1103 */
1104 raid1_log(mddev, "wait behind writes");
1105 wait_event(bitmap->behind_wait,
1106 atomic_read(&bitmap->behind_writes) == 0);
1107 }
1108 r1_bio->read_disk = rdisk;
1109 r1_bio->start_next_window = 0;
1110
1111 read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1112 bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
1113 max_sectors);
1114
1115 r1_bio->bios[rdisk] = read_bio;
1116
1117 read_bio->bi_iter.bi_sector = r1_bio->sector +
1118 mirror->rdev->data_offset;
1119 read_bio->bi_bdev = mirror->rdev->bdev;
1120 read_bio->bi_end_io = raid1_end_read_request;
1121 bio_set_op_attrs(read_bio, op, do_sync);
1122 if (test_bit(FailFast, &mirror->rdev->flags) &&
1123 test_bit(R1BIO_FailFast, &r1_bio->state))
1124 read_bio->bi_opf |= MD_FAILFAST;
1125 read_bio->bi_private = r1_bio;
1126
1127 if (mddev->gendisk)
1128 trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
1129 read_bio, disk_devt(mddev->gendisk),
1130 r1_bio->sector);
1131
1132 if (max_sectors < r1_bio->sectors) {
1133 /*
1134 * could not read all from this device, so we will need another
1135 * r1_bio.
1136 */
1137 sectors_handled = (r1_bio->sector + max_sectors
1138 - bio->bi_iter.bi_sector);
1139 r1_bio->sectors = max_sectors;
1140 spin_lock_irq(&conf->device_lock);
1141 if (bio->bi_phys_segments == 0)
1142 bio->bi_phys_segments = 2;
1143 else
1144 bio->bi_phys_segments++;
1145 spin_unlock_irq(&conf->device_lock);
1146
1147 /*
1148 * Cannot call generic_make_request directly as that will be
1149 * queued in __make_request and subsequent mempool_alloc might
1150 * block waiting for it. So hand bio over to raid1d.
1151 */
1152 reschedule_retry(r1_bio);
1153
1154 r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
1155
1156 r1_bio->master_bio = bio;
1157 r1_bio->sectors = bio_sectors(bio) - sectors_handled;
1158 r1_bio->state = 0;
1159 r1_bio->mddev = mddev;
1160 r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
1161 goto read_again;
1162 } else
1163 generic_make_request(read_bio);
1164}
1165
1166static void raid1_write_request(struct mddev *mddev, struct bio *bio,
1167 struct r1bio *r1_bio)
1168{
1169 struct r1conf *conf = mddev->private;
1075 int i, disks; 1170 int i, disks;
1076 struct bitmap *bitmap; 1171 struct bitmap *bitmap = mddev->bitmap;
1077 unsigned long flags; 1172 unsigned long flags;
1078 const int op = bio_op(bio); 1173 const int op = bio_op(bio);
1079 const int rw = bio_data_dir(bio);
1080 const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); 1174 const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
1081 const unsigned long do_flush_fua = (bio->bi_opf & 1175 const unsigned long do_flush_fua = (bio->bi_opf &
1082 (REQ_PREFLUSH | REQ_FUA)); 1176 (REQ_PREFLUSH | REQ_FUA));
@@ -1096,15 +1190,15 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
1096 1190
1097 md_write_start(mddev, bio); /* wait on superblock update early */ 1191 md_write_start(mddev, bio); /* wait on superblock update early */
1098 1192
1099 if (bio_data_dir(bio) == WRITE && 1193 if ((bio_end_sector(bio) > mddev->suspend_lo &&
1100 ((bio_end_sector(bio) > mddev->suspend_lo &&
1101 bio->bi_iter.bi_sector < mddev->suspend_hi) || 1194 bio->bi_iter.bi_sector < mddev->suspend_hi) ||
1102 (mddev_is_clustered(mddev) && 1195 (mddev_is_clustered(mddev) &&
1103 md_cluster_ops->area_resyncing(mddev, WRITE, 1196 md_cluster_ops->area_resyncing(mddev, WRITE,
1104 bio->bi_iter.bi_sector, bio_end_sector(bio))))) { 1197 bio->bi_iter.bi_sector, bio_end_sector(bio)))) {
1105 /* As the suspend_* range is controlled by 1198
1106 * userspace, we want an interruptible 1199 /*
1107 * wait. 1200 * As the suspend_* range is controlled by userspace, we want
1201 * an interruptible wait.
1108 */ 1202 */
1109 DEFINE_WAIT(w); 1203 DEFINE_WAIT(w);
1110 for (;;) { 1204 for (;;) {
@@ -1115,128 +1209,15 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
1115 bio->bi_iter.bi_sector >= mddev->suspend_hi || 1209 bio->bi_iter.bi_sector >= mddev->suspend_hi ||
1116 (mddev_is_clustered(mddev) && 1210 (mddev_is_clustered(mddev) &&
1117 !md_cluster_ops->area_resyncing(mddev, WRITE, 1211 !md_cluster_ops->area_resyncing(mddev, WRITE,
1118 bio->bi_iter.bi_sector, bio_end_sector(bio)))) 1212 bio->bi_iter.bi_sector,
1213 bio_end_sector(bio))))
1119 break; 1214 break;
1120 schedule(); 1215 schedule();
1121 } 1216 }
1122 finish_wait(&conf->wait_barrier, &w); 1217 finish_wait(&conf->wait_barrier, &w);
1123 } 1218 }
1124
1125 start_next_window = wait_barrier(conf, bio); 1219 start_next_window = wait_barrier(conf, bio);
1126 1220
1127 bitmap = mddev->bitmap;
1128
1129 /*
1130 * make_request() can abort the operation when read-ahead is being
1131 * used and no empty request is available.
1132 *
1133 */
1134 r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
1135
1136 r1_bio->master_bio = bio;
1137 r1_bio->sectors = bio_sectors(bio);
1138 r1_bio->state = 0;
1139 r1_bio->mddev = mddev;
1140 r1_bio->sector = bio->bi_iter.bi_sector;
1141
1142 /* We might need to issue multiple reads to different
1143 * devices if there are bad blocks around, so we keep
1144 * track of the number of reads in bio->bi_phys_segments.
1145 * If this is 0, there is only one r1_bio and no locking
1146 * will be needed when requests complete. If it is
1147 * non-zero, then it is the number of not-completed requests.
1148 */
1149 bio->bi_phys_segments = 0;
1150 bio_clear_flag(bio, BIO_SEG_VALID);
1151
1152 if (rw == READ) {
1153 /*
1154 * read balancing logic:
1155 */
1156 int rdisk;
1157
1158read_again:
1159 rdisk = read_balance(conf, r1_bio, &max_sectors);
1160
1161 if (rdisk < 0) {
1162 /* couldn't find anywhere to read from */
1163 raid_end_bio_io(r1_bio);
1164 return;
1165 }
1166 mirror = conf->mirrors + rdisk;
1167
1168 if (test_bit(WriteMostly, &mirror->rdev->flags) &&
1169 bitmap) {
1170 /* Reading from a write-mostly device must
1171 * take care not to over-take any writes
1172 * that are 'behind'
1173 */
1174 raid1_log(mddev, "wait behind writes");
1175 wait_event(bitmap->behind_wait,
1176 atomic_read(&bitmap->behind_writes) == 0);
1177 }
1178 r1_bio->read_disk = rdisk;
1179 r1_bio->start_next_window = 0;
1180
1181 read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1182 bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
1183 max_sectors);
1184
1185 r1_bio->bios[rdisk] = read_bio;
1186
1187 read_bio->bi_iter.bi_sector = r1_bio->sector +
1188 mirror->rdev->data_offset;
1189 read_bio->bi_bdev = mirror->rdev->bdev;
1190 read_bio->bi_end_io = raid1_end_read_request;
1191 bio_set_op_attrs(read_bio, op, do_sync);
1192 if (test_bit(FailFast, &mirror->rdev->flags) &&
1193 test_bit(R1BIO_FailFast, &r1_bio->state))
1194 read_bio->bi_opf |= MD_FAILFAST;
1195 read_bio->bi_private = r1_bio;
1196
1197 if (mddev->gendisk)
1198 trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
1199 read_bio, disk_devt(mddev->gendisk),
1200 r1_bio->sector);
1201
1202 if (max_sectors < r1_bio->sectors) {
1203 /* could not read all from this device, so we will
1204 * need another r1_bio.
1205 */
1206
1207 sectors_handled = (r1_bio->sector + max_sectors
1208 - bio->bi_iter.bi_sector);
1209 r1_bio->sectors = max_sectors;
1210 spin_lock_irq(&conf->device_lock);
1211 if (bio->bi_phys_segments == 0)
1212 bio->bi_phys_segments = 2;
1213 else
1214 bio->bi_phys_segments++;
1215 spin_unlock_irq(&conf->device_lock);
1216 /* Cannot call generic_make_request directly
1217 * as that will be queued in __make_request
1218 * and subsequent mempool_alloc might block waiting
1219 * for it. So hand bio over to raid1d.
1220 */
1221 reschedule_retry(r1_bio);
1222
1223 r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
1224
1225 r1_bio->master_bio = bio;
1226 r1_bio->sectors = bio_sectors(bio) - sectors_handled;
1227 r1_bio->state = 0;
1228 r1_bio->mddev = mddev;
1229 r1_bio->sector = bio->bi_iter.bi_sector +
1230 sectors_handled;
1231 goto read_again;
1232 } else
1233 generic_make_request(read_bio);
1234 return;
1235 }
1236
1237 /*
1238 * WRITE:
1239 */
1240 if (conf->pending_count >= max_queued_requests) { 1221 if (conf->pending_count >= max_queued_requests) {
1241 md_wakeup_thread(mddev->thread); 1222 md_wakeup_thread(mddev->thread);
1242 raid1_log(mddev, "wait queued"); 1223 raid1_log(mddev, "wait queued");
@@ -1280,8 +1261,7 @@ read_again:
1280 int bad_sectors; 1261 int bad_sectors;
1281 int is_bad; 1262 int is_bad;
1282 1263
1283 is_bad = is_badblock(rdev, r1_bio->sector, 1264 is_bad = is_badblock(rdev, r1_bio->sector, max_sectors,
1284 max_sectors,
1285 &first_bad, &bad_sectors); 1265 &first_bad, &bad_sectors);
1286 if (is_bad < 0) { 1266 if (is_bad < 0) {
1287 /* mustn't write here until the bad block is 1267 /* mustn't write here until the bad block is
@@ -1370,7 +1350,8 @@ read_again:
1370 continue; 1350 continue;
1371 1351
1372 mbio = bio_clone_mddev(bio, GFP_NOIO, mddev); 1352 mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1373 bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector, max_sectors); 1353 bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector,
1354 max_sectors);
1374 1355
1375 if (first_clone) { 1356 if (first_clone) {
1376 /* do behind I/O ? 1357 /* do behind I/O ?
@@ -1464,6 +1445,40 @@ read_again:
1464 wake_up(&conf->wait_barrier); 1445 wake_up(&conf->wait_barrier);
1465} 1446}
1466 1447
1448static void raid1_make_request(struct mddev *mddev, struct bio *bio)
1449{
1450 struct r1conf *conf = mddev->private;
1451 struct r1bio *r1_bio;
1452
1453 /*
1454 * make_request() can abort the operation when read-ahead is being
1455 * used and no empty request is available.
1456 *
1457 */
1458 r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
1459
1460 r1_bio->master_bio = bio;
1461 r1_bio->sectors = bio_sectors(bio);
1462 r1_bio->state = 0;
1463 r1_bio->mddev = mddev;
1464 r1_bio->sector = bio->bi_iter.bi_sector;
1465
1466 /*
1467 * We might need to issue multiple reads to different devices if there
1468 * are bad blocks around, so we keep track of the number of reads in
1469 * bio->bi_phys_segments. If this is 0, there is only one r1_bio and
1470 * no locking will be needed when requests complete. If it is
1471 * non-zero, then it is the number of not-completed requests.
1472 */
1473 bio->bi_phys_segments = 0;
1474 bio_clear_flag(bio, BIO_SEG_VALID);
1475
1476 if (bio_data_dir(bio) == READ)
1477 raid1_read_request(mddev, bio, r1_bio);
1478 else
1479 raid1_write_request(mddev, bio, r1_bio);
1480}
1481
1467static void raid1_status(struct seq_file *seq, struct mddev *mddev) 1482static void raid1_status(struct seq_file *seq, struct mddev *mddev)
1468{ 1483{
1469 struct r1conf *conf = mddev->private; 1484 struct r1conf *conf = mddev->private;
@@ -3246,8 +3261,8 @@ static void *raid1_takeover(struct mddev *mddev)
3246 if (!IS_ERR(conf)) { 3261 if (!IS_ERR(conf)) {
3247 /* Array must appear to be quiesced */ 3262 /* Array must appear to be quiesced */
3248 conf->array_frozen = 1; 3263 conf->array_frozen = 1;
3249 clear_bit(MD_HAS_JOURNAL, &mddev->flags); 3264 mddev_clear_unsupported_flags(mddev,
3250 clear_bit(MD_JOURNAL_CLEAN, &mddev->flags); 3265 UNSUPPORTED_MDDEV_FLAGS);
3251 } 3266 }
3252 return conf; 3267 return conf;
3253 } 3268 }
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index ab5e86209322..1920756828df 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1087,23 +1087,122 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
1087 kfree(plug); 1087 kfree(plug);
1088} 1088}
1089 1089
1090static void __make_request(struct mddev *mddev, struct bio *bio) 1090static void raid10_read_request(struct mddev *mddev, struct bio *bio,
1091 struct r10bio *r10_bio)
1091{ 1092{
1092 struct r10conf *conf = mddev->private; 1093 struct r10conf *conf = mddev->private;
1093 struct r10bio *r10_bio;
1094 struct bio *read_bio; 1094 struct bio *read_bio;
1095 const int op = bio_op(bio);
1096 const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
1097 int sectors_handled;
1098 int max_sectors;
1099 sector_t sectors;
1100 struct md_rdev *rdev;
1101 int slot;
1102
1103 /*
1104 * Register the new request and wait if the reconstruction
1105 * thread has put up a bar for new requests.
1106 * Continue immediately if no resync is active currently.
1107 */
1108 wait_barrier(conf);
1109
1110 sectors = bio_sectors(bio);
1111 while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
1112 bio->bi_iter.bi_sector < conf->reshape_progress &&
1113 bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
1114 /*
1115 * IO spans the reshape position. Need to wait for reshape to
1116 * pass
1117 */
1118 raid10_log(conf->mddev, "wait reshape");
1119 allow_barrier(conf);
1120 wait_event(conf->wait_barrier,
1121 conf->reshape_progress <= bio->bi_iter.bi_sector ||
1122 conf->reshape_progress >= bio->bi_iter.bi_sector +
1123 sectors);
1124 wait_barrier(conf);
1125 }
1126
1127read_again:
1128 rdev = read_balance(conf, r10_bio, &max_sectors);
1129 if (!rdev) {
1130 raid_end_bio_io(r10_bio);
1131 return;
1132 }
1133 slot = r10_bio->read_slot;
1134
1135 read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1136 bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
1137 max_sectors);
1138
1139 r10_bio->devs[slot].bio = read_bio;
1140 r10_bio->devs[slot].rdev = rdev;
1141
1142 read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
1143 choose_data_offset(r10_bio, rdev);
1144 read_bio->bi_bdev = rdev->bdev;
1145 read_bio->bi_end_io = raid10_end_read_request;
1146 bio_set_op_attrs(read_bio, op, do_sync);
1147 if (test_bit(FailFast, &rdev->flags) &&
1148 test_bit(R10BIO_FailFast, &r10_bio->state))
1149 read_bio->bi_opf |= MD_FAILFAST;
1150 read_bio->bi_private = r10_bio;
1151
1152 if (mddev->gendisk)
1153 trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
1154 read_bio, disk_devt(mddev->gendisk),
1155 r10_bio->sector);
1156 if (max_sectors < r10_bio->sectors) {
1157 /*
1158 * Could not read all from this device, so we will need another
1159 * r10_bio.
1160 */
1161 sectors_handled = (r10_bio->sector + max_sectors
1162 - bio->bi_iter.bi_sector);
1163 r10_bio->sectors = max_sectors;
1164 spin_lock_irq(&conf->device_lock);
1165 if (bio->bi_phys_segments == 0)
1166 bio->bi_phys_segments = 2;
1167 else
1168 bio->bi_phys_segments++;
1169 spin_unlock_irq(&conf->device_lock);
1170 /*
1171 * Cannot call generic_make_request directly as that will be
1172 * queued in __generic_make_request and subsequent
1173 * mempool_alloc might block waiting for it. so hand bio over
1174 * to raid10d.
1175 */
1176 reschedule_retry(r10_bio);
1177
1178 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
1179
1180 r10_bio->master_bio = bio;
1181 r10_bio->sectors = bio_sectors(bio) - sectors_handled;
1182 r10_bio->state = 0;
1183 r10_bio->mddev = mddev;
1184 r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
1185 goto read_again;
1186 } else
1187 generic_make_request(read_bio);
1188 return;
1189}
1190
1191static void raid10_write_request(struct mddev *mddev, struct bio *bio,
1192 struct r10bio *r10_bio)
1193{
1194 struct r10conf *conf = mddev->private;
1095 int i; 1195 int i;
1096 const int op = bio_op(bio); 1196 const int op = bio_op(bio);
1097 const int rw = bio_data_dir(bio);
1098 const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); 1197 const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
1099 const unsigned long do_fua = (bio->bi_opf & REQ_FUA); 1198 const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
1100 unsigned long flags; 1199 unsigned long flags;
1101 struct md_rdev *blocked_rdev; 1200 struct md_rdev *blocked_rdev;
1102 struct blk_plug_cb *cb; 1201 struct blk_plug_cb *cb;
1103 struct raid10_plug_cb *plug = NULL; 1202 struct raid10_plug_cb *plug = NULL;
1203 sector_t sectors;
1104 int sectors_handled; 1204 int sectors_handled;
1105 int max_sectors; 1205 int max_sectors;
1106 int sectors;
1107 1206
1108 md_write_start(mddev, bio); 1207 md_write_start(mddev, bio);
1109 1208
@@ -1118,8 +1217,9 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
1118 while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && 1217 while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
1119 bio->bi_iter.bi_sector < conf->reshape_progress && 1218 bio->bi_iter.bi_sector < conf->reshape_progress &&
1120 bio->bi_iter.bi_sector + sectors > conf->reshape_progress) { 1219 bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
1121 /* IO spans the reshape position. Need to wait for 1220 /*
1122 * reshape to pass 1221 * IO spans the reshape position. Need to wait for reshape to
1222 * pass
1123 */ 1223 */
1124 raid10_log(conf->mddev, "wait reshape"); 1224 raid10_log(conf->mddev, "wait reshape");
1125 allow_barrier(conf); 1225 allow_barrier(conf);
@@ -1129,8 +1229,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
1129 sectors); 1229 sectors);
1130 wait_barrier(conf); 1230 wait_barrier(conf);
1131 } 1231 }
1232
1132 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && 1233 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
1133 bio_data_dir(bio) == WRITE &&
1134 (mddev->reshape_backwards 1234 (mddev->reshape_backwards
1135 ? (bio->bi_iter.bi_sector < conf->reshape_safe && 1235 ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
1136 bio->bi_iter.bi_sector + sectors > conf->reshape_progress) 1236 bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
@@ -1148,98 +1248,6 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
1148 conf->reshape_safe = mddev->reshape_position; 1248 conf->reshape_safe = mddev->reshape_position;
1149 } 1249 }
1150 1250
1151 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
1152
1153 r10_bio->master_bio = bio;
1154 r10_bio->sectors = sectors;
1155
1156 r10_bio->mddev = mddev;
1157 r10_bio->sector = bio->bi_iter.bi_sector;
1158 r10_bio->state = 0;
1159
1160 /* We might need to issue multiple reads to different
1161 * devices if there are bad blocks around, so we keep
1162 * track of the number of reads in bio->bi_phys_segments.
1163 * If this is 0, there is only one r10_bio and no locking
1164 * will be needed when the request completes. If it is
1165 * non-zero, then it is the number of not-completed requests.
1166 */
1167 bio->bi_phys_segments = 0;
1168 bio_clear_flag(bio, BIO_SEG_VALID);
1169
1170 if (rw == READ) {
1171 /*
1172 * read balancing logic:
1173 */
1174 struct md_rdev *rdev;
1175 int slot;
1176
1177read_again:
1178 rdev = read_balance(conf, r10_bio, &max_sectors);
1179 if (!rdev) {
1180 raid_end_bio_io(r10_bio);
1181 return;
1182 }
1183 slot = r10_bio->read_slot;
1184
1185 read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1186 bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
1187 max_sectors);
1188
1189 r10_bio->devs[slot].bio = read_bio;
1190 r10_bio->devs[slot].rdev = rdev;
1191
1192 read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
1193 choose_data_offset(r10_bio, rdev);
1194 read_bio->bi_bdev = rdev->bdev;
1195 read_bio->bi_end_io = raid10_end_read_request;
1196 bio_set_op_attrs(read_bio, op, do_sync);
1197 if (test_bit(FailFast, &rdev->flags) &&
1198 test_bit(R10BIO_FailFast, &r10_bio->state))
1199 read_bio->bi_opf |= MD_FAILFAST;
1200 read_bio->bi_private = r10_bio;
1201
1202 if (mddev->gendisk)
1203 trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
1204 read_bio, disk_devt(mddev->gendisk),
1205 r10_bio->sector);
1206 if (max_sectors < r10_bio->sectors) {
1207 /* Could not read all from this device, so we will
1208 * need another r10_bio.
1209 */
1210 sectors_handled = (r10_bio->sector + max_sectors
1211 - bio->bi_iter.bi_sector);
1212 r10_bio->sectors = max_sectors;
1213 spin_lock_irq(&conf->device_lock);
1214 if (bio->bi_phys_segments == 0)
1215 bio->bi_phys_segments = 2;
1216 else
1217 bio->bi_phys_segments++;
1218 spin_unlock_irq(&conf->device_lock);
1219 /* Cannot call generic_make_request directly
1220 * as that will be queued in __generic_make_request
1221 * and subsequent mempool_alloc might block
1222 * waiting for it. so hand bio over to raid10d.
1223 */
1224 reschedule_retry(r10_bio);
1225
1226 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
1227
1228 r10_bio->master_bio = bio;
1229 r10_bio->sectors = bio_sectors(bio) - sectors_handled;
1230 r10_bio->state = 0;
1231 r10_bio->mddev = mddev;
1232 r10_bio->sector = bio->bi_iter.bi_sector +
1233 sectors_handled;
1234 goto read_again;
1235 } else
1236 generic_make_request(read_bio);
1237 return;
1238 }
1239
1240 /*
1241 * WRITE:
1242 */
1243 if (conf->pending_count >= max_queued_requests) { 1251 if (conf->pending_count >= max_queued_requests) {
1244 md_wakeup_thread(mddev->thread); 1252 md_wakeup_thread(mddev->thread);
1245 raid10_log(mddev, "wait queued"); 1253 raid10_log(mddev, "wait queued");
@@ -1300,8 +1308,7 @@ retry_write:
1300 int bad_sectors; 1308 int bad_sectors;
1301 int is_bad; 1309 int is_bad;
1302 1310
1303 is_bad = is_badblock(rdev, dev_sector, 1311 is_bad = is_badblock(rdev, dev_sector, max_sectors,
1304 max_sectors,
1305 &first_bad, &bad_sectors); 1312 &first_bad, &bad_sectors);
1306 if (is_bad < 0) { 1313 if (is_bad < 0) {
1307 /* Mustn't write here until the bad block 1314 /* Mustn't write here until the bad block
@@ -1405,8 +1412,7 @@ retry_write:
1405 r10_bio->devs[i].bio = mbio; 1412 r10_bio->devs[i].bio = mbio;
1406 1413
1407 mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+ 1414 mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
1408 choose_data_offset(r10_bio, 1415 choose_data_offset(r10_bio, rdev));
1409 rdev));
1410 mbio->bi_bdev = rdev->bdev; 1416 mbio->bi_bdev = rdev->bdev;
1411 mbio->bi_end_io = raid10_end_write_request; 1417 mbio->bi_end_io = raid10_end_write_request;
1412 bio_set_op_attrs(mbio, op, do_sync | do_fua); 1418 bio_set_op_attrs(mbio, op, do_sync | do_fua);
@@ -1457,8 +1463,7 @@ retry_write:
1457 r10_bio->devs[i].repl_bio = mbio; 1463 r10_bio->devs[i].repl_bio = mbio;
1458 1464
1459 mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr + 1465 mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr +
1460 choose_data_offset( 1466 choose_data_offset(r10_bio, rdev));
1461 r10_bio, rdev));
1462 mbio->bi_bdev = rdev->bdev; 1467 mbio->bi_bdev = rdev->bdev;
1463 mbio->bi_end_io = raid10_end_write_request; 1468 mbio->bi_end_io = raid10_end_write_request;
1464 bio_set_op_attrs(mbio, op, do_sync | do_fua); 1469 bio_set_op_attrs(mbio, op, do_sync | do_fua);
@@ -1503,6 +1508,36 @@ retry_write:
1503 one_write_done(r10_bio); 1508 one_write_done(r10_bio);
1504} 1509}
1505 1510
1511static void __make_request(struct mddev *mddev, struct bio *bio)
1512{
1513 struct r10conf *conf = mddev->private;
1514 struct r10bio *r10_bio;
1515
1516 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
1517
1518 r10_bio->master_bio = bio;
1519 r10_bio->sectors = bio_sectors(bio);
1520
1521 r10_bio->mddev = mddev;
1522 r10_bio->sector = bio->bi_iter.bi_sector;
1523 r10_bio->state = 0;
1524
1525 /*
1526 * We might need to issue multiple reads to different devices if there
1527 * are bad blocks around, so we keep track of the number of reads in
1528 * bio->bi_phys_segments. If this is 0, there is only one r10_bio and
1529 * no locking will be needed when the request completes. If it is
1530 * non-zero, then it is the number of not-completed requests.
1531 */
1532 bio->bi_phys_segments = 0;
1533 bio_clear_flag(bio, BIO_SEG_VALID);
1534
1535 if (bio_data_dir(bio) == READ)
1536 raid10_read_request(mddev, bio, r10_bio);
1537 else
1538 raid10_write_request(mddev, bio, r10_bio);
1539}
1540
1506static void raid10_make_request(struct mddev *mddev, struct bio *bio) 1541static void raid10_make_request(struct mddev *mddev, struct bio *bio)
1507{ 1542{
1508 struct r10conf *conf = mddev->private; 1543 struct r10conf *conf = mddev->private;
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index d7bfb6fc8aef..0e8ed2c327b0 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -1682,8 +1682,7 @@ out:
1682 1682
1683static struct stripe_head * 1683static struct stripe_head *
1684r5c_recovery_alloc_stripe(struct r5conf *conf, 1684r5c_recovery_alloc_stripe(struct r5conf *conf,
1685 sector_t stripe_sect, 1685 sector_t stripe_sect)
1686 sector_t log_start)
1687{ 1686{
1688 struct stripe_head *sh; 1687 struct stripe_head *sh;
1689 1688
@@ -1692,7 +1691,6 @@ r5c_recovery_alloc_stripe(struct r5conf *conf,
1692 return NULL; /* no more stripe available */ 1691 return NULL; /* no more stripe available */
1693 1692
1694 r5l_recovery_reset_stripe(sh); 1693 r5l_recovery_reset_stripe(sh);
1695 sh->log_start = log_start;
1696 1694
1697 return sh; 1695 return sh;
1698} 1696}
@@ -1862,7 +1860,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
1862 stripe_sect); 1860 stripe_sect);
1863 1861
1864 if (!sh) { 1862 if (!sh) {
1865 sh = r5c_recovery_alloc_stripe(conf, stripe_sect, ctx->pos); 1863 sh = r5c_recovery_alloc_stripe(conf, stripe_sect);
1866 /* 1864 /*
1867 * cannot get stripe from raid5_get_active_stripe 1865 * cannot get stripe from raid5_get_active_stripe
1868 * try replay some stripes 1866 * try replay some stripes
@@ -1871,7 +1869,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
1871 r5c_recovery_replay_stripes( 1869 r5c_recovery_replay_stripes(
1872 cached_stripe_list, ctx); 1870 cached_stripe_list, ctx);
1873 sh = r5c_recovery_alloc_stripe( 1871 sh = r5c_recovery_alloc_stripe(
1874 conf, stripe_sect, ctx->pos); 1872 conf, stripe_sect);
1875 } 1873 }
1876 if (!sh) { 1874 if (!sh) {
1877 pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n", 1875 pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n",
@@ -1879,8 +1877,8 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
1879 conf->min_nr_stripes * 2); 1877 conf->min_nr_stripes * 2);
1880 raid5_set_cache_size(mddev, 1878 raid5_set_cache_size(mddev,
1881 conf->min_nr_stripes * 2); 1879 conf->min_nr_stripes * 2);
1882 sh = r5c_recovery_alloc_stripe( 1880 sh = r5c_recovery_alloc_stripe(conf,
1883 conf, stripe_sect, ctx->pos); 1881 stripe_sect);
1884 } 1882 }
1885 if (!sh) { 1883 if (!sh) {
1886 pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n", 1884 pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n",
@@ -1894,7 +1892,6 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
1894 if (!test_bit(STRIPE_R5C_CACHING, &sh->state) && 1892 if (!test_bit(STRIPE_R5C_CACHING, &sh->state) &&
1895 test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags)) { 1893 test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags)) {
1896 r5l_recovery_replay_one_stripe(conf, sh, ctx); 1894 r5l_recovery_replay_one_stripe(conf, sh, ctx);
1897 sh->log_start = ctx->pos;
1898 list_move_tail(&sh->lru, cached_stripe_list); 1895 list_move_tail(&sh->lru, cached_stripe_list);
1899 } 1896 }
1900 r5l_recovery_load_data(log, sh, ctx, payload, 1897 r5l_recovery_load_data(log, sh, ctx, payload,
@@ -1933,8 +1930,6 @@ static void r5c_recovery_load_one_stripe(struct r5l_log *log,
1933 set_bit(R5_UPTODATE, &dev->flags); 1930 set_bit(R5_UPTODATE, &dev->flags);
1934 } 1931 }
1935 } 1932 }
1936 list_add_tail(&sh->r5c, &log->stripe_in_journal_list);
1937 atomic_inc(&log->stripe_in_journal_count);
1938} 1933}
1939 1934
1940/* 1935/*
@@ -2070,6 +2065,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
2070 struct stripe_head *sh, *next; 2065 struct stripe_head *sh, *next;
2071 struct mddev *mddev = log->rdev->mddev; 2066 struct mddev *mddev = log->rdev->mddev;
2072 struct page *page; 2067 struct page *page;
2068 sector_t next_checkpoint = MaxSector;
2073 2069
2074 page = alloc_page(GFP_KERNEL); 2070 page = alloc_page(GFP_KERNEL);
2075 if (!page) { 2071 if (!page) {
@@ -2078,6 +2074,8 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
2078 return -ENOMEM; 2074 return -ENOMEM;
2079 } 2075 }
2080 2076
2077 WARN_ON(list_empty(&ctx->cached_list));
2078
2081 list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) { 2079 list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) {
2082 struct r5l_meta_block *mb; 2080 struct r5l_meta_block *mb;
2083 int i; 2081 int i;
@@ -2123,12 +2121,15 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
2123 sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page, 2121 sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page,
2124 REQ_OP_WRITE, REQ_FUA, false); 2122 REQ_OP_WRITE, REQ_FUA, false);
2125 sh->log_start = ctx->pos; 2123 sh->log_start = ctx->pos;
2124 list_add_tail(&sh->r5c, &log->stripe_in_journal_list);
2125 atomic_inc(&log->stripe_in_journal_count);
2126 ctx->pos = write_pos; 2126 ctx->pos = write_pos;
2127 ctx->seq += 1; 2127 ctx->seq += 1;
2128 2128 next_checkpoint = sh->log_start;
2129 list_del_init(&sh->lru); 2129 list_del_init(&sh->lru);
2130 raid5_release_stripe(sh); 2130 raid5_release_stripe(sh);
2131 } 2131 }
2132 log->next_checkpoint = next_checkpoint;
2132 __free_page(page); 2133 __free_page(page);
2133 return 0; 2134 return 0;
2134} 2135}
@@ -2139,7 +2140,6 @@ static int r5l_recovery_log(struct r5l_log *log)
2139 struct r5l_recovery_ctx ctx; 2140 struct r5l_recovery_ctx ctx;
2140 int ret; 2141 int ret;
2141 sector_t pos; 2142 sector_t pos;
2142 struct stripe_head *sh;
2143 2143
2144 ctx.pos = log->last_checkpoint; 2144 ctx.pos = log->last_checkpoint;
2145 ctx.seq = log->last_cp_seq; 2145 ctx.seq = log->last_cp_seq;
@@ -2164,16 +2164,13 @@ static int r5l_recovery_log(struct r5l_log *log)
2164 log->next_checkpoint = ctx.pos; 2164 log->next_checkpoint = ctx.pos;
2165 r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++); 2165 r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++);
2166 ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS); 2166 ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
2167 } else {
2168 sh = list_last_entry(&ctx.cached_list, struct stripe_head, lru);
2169 log->next_checkpoint = sh->log_start;
2170 } 2167 }
2171 2168
2172 if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0)) 2169 if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0))
2173 pr_debug("md/raid:%s: starting from clean shutdown\n", 2170 pr_debug("md/raid:%s: starting from clean shutdown\n",
2174 mdname(mddev)); 2171 mdname(mddev));
2175 else { 2172 else {
2176 pr_debug("md/raid:%s: recoverying %d data-only stripes and %d data-parity stripes\n", 2173 pr_debug("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n",
2177 mdname(mddev), ctx.data_only_stripes, 2174 mdname(mddev), ctx.data_only_stripes,
2178 ctx.data_parity_stripes); 2175 ctx.data_parity_stripes);
2179 2176
@@ -2418,9 +2415,6 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
2418 if (do_wakeup) 2415 if (do_wakeup)
2419 wake_up(&conf->wait_for_overlap); 2416 wake_up(&conf->wait_for_overlap);
2420 2417
2421 if (conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
2422 return;
2423
2424 spin_lock_irq(&conf->log->stripe_in_journal_lock); 2418 spin_lock_irq(&conf->log->stripe_in_journal_lock);
2425 list_del_init(&sh->r5c); 2419 list_del_init(&sh->r5c);
2426 spin_unlock_irq(&conf->log->stripe_in_journal_lock); 2420 spin_unlock_irq(&conf->log->stripe_in_journal_lock);
@@ -2639,14 +2633,16 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
2639 spin_lock_init(&log->stripe_in_journal_lock); 2633 spin_lock_init(&log->stripe_in_journal_lock);
2640 atomic_set(&log->stripe_in_journal_count, 0); 2634 atomic_set(&log->stripe_in_journal_count, 0);
2641 2635
2636 rcu_assign_pointer(conf->log, log);
2637
2642 if (r5l_load_log(log)) 2638 if (r5l_load_log(log))
2643 goto error; 2639 goto error;
2644 2640
2645 rcu_assign_pointer(conf->log, log);
2646 set_bit(MD_HAS_JOURNAL, &conf->mddev->flags); 2641 set_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
2647 return 0; 2642 return 0;
2648 2643
2649error: 2644error:
2645 rcu_assign_pointer(conf->log, NULL);
2650 md_unregister_thread(&log->reclaim_thread); 2646 md_unregister_thread(&log->reclaim_thread);
2651reclaim_thread: 2647reclaim_thread:
2652 mempool_destroy(log->meta_pool); 2648 mempool_destroy(log->meta_pool);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 06d7279bdd04..36c13e4be9c9 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -62,6 +62,8 @@
62#include "raid0.h" 62#include "raid0.h"
63#include "bitmap.h" 63#include "bitmap.h"
64 64
65#define UNSUPPORTED_MDDEV_FLAGS (1L << MD_FAILFAST_SUPPORTED)
66
65#define cpu_to_group(cpu) cpu_to_node(cpu) 67#define cpu_to_group(cpu) cpu_to_node(cpu)
66#define ANY_GROUP NUMA_NO_NODE 68#define ANY_GROUP NUMA_NO_NODE
67 69
@@ -7829,8 +7831,9 @@ static void *raid5_takeover_raid1(struct mddev *mddev)
7829 mddev->new_chunk_sectors = chunksect; 7831 mddev->new_chunk_sectors = chunksect;
7830 7832
7831 ret = setup_conf(mddev); 7833 ret = setup_conf(mddev);
7832 if (!IS_ERR_VALUE(ret)) 7834 if (!IS_ERR(ret))
7833 clear_bit(MD_FAILFAST_SUPPORTED, &mddev->flags); 7835 mddev_clear_unsupported_flags(mddev,
7836 UNSUPPORTED_MDDEV_FLAGS);
7834 return ret; 7837 return ret;
7835} 7838}
7836 7839