aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid1.c
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.com>2017-04-05 00:05:50 -0400
committerShaohua Li <shli@fb.com>2017-04-11 13:10:20 -0400
commit689389a06ce79fdced85b5115717f71c71e623e0 (patch)
treebeacf4e145c55913542aea4f83188bec206d9221 /drivers/md/raid1.c
parent50512625da06c41517cb596f51b923ce15f401a4 (diff)
md/raid1: simplify handle_read_error().
handle_read_error() duplicates a lot of the work that raid1_read_request() does, so it makes sense to just use that function. This doesn't quite work as handle_read_error() relies on the same r1bio being re-used so that, in the case of a read-only array, setting IO_BLOCKED in r1bio->bios[] ensures read_balance() won't re-use that device. So we need to allow a r1bio to be passed to raid1_read_request(), and to have that function mostly initialise the r1bio, but leave the bios[] array untouched. Two parts of handle_read_error() that need to be preserved are the warning message it prints, so they are conditionally added to raid1_read_request(). Note that this highlights a minor bug on alloc_r1bio(). It doesn't initalise the bios[] array, so it is possible that old content is there, which might cause read_balance() to ignore some devices with no good reason. With this change, we no longer need inc_pending(), or the sectors_handled arg to alloc_r1bio(). As handle_read_error() is called from raid1d() and allocates memory, there is tiny chance of a deadlock. All element of various pools could be queued waiting for raid1 to handle them, and there may be no extra memory free. Achieving guaranteed forward progress would probably require a second thread and another mempool. Instead of that complexity, add __GFP_HIGH to any allocations when read1_read_request() is called from raid1d. Signed-off-by: NeilBrown <neilb@suse.com> Signed-off-by: Shaohua Li <shli@fb.com>
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r--drivers/md/raid1.c140
1 files changed, 60 insertions, 80 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 18af00c86b42..29a9aa9254c3 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -988,16 +988,6 @@ static void wait_read_barrier(struct r1conf *conf, sector_t sector_nr)
988 spin_unlock_irq(&conf->resync_lock); 988 spin_unlock_irq(&conf->resync_lock);
989} 989}
990 990
991static void inc_pending(struct r1conf *conf, sector_t bi_sector)
992{
993 /* The current request requires multiple r1_bio, so
994 * we need to increment the pending count, and the corresponding
995 * window count.
996 */
997 int idx = sector_to_idx(bi_sector);
998 atomic_inc(&conf->nr_pending[idx]);
999}
1000
1001static void wait_barrier(struct r1conf *conf, sector_t sector_nr) 991static void wait_barrier(struct r1conf *conf, sector_t sector_nr)
1002{ 992{
1003 int idx = sector_to_idx(sector_nr); 993 int idx = sector_to_idx(sector_nr);
@@ -1184,35 +1174,60 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
1184 kfree(plug); 1174 kfree(plug);
1185} 1175}
1186 1176
1177static void init_r1bio(struct r1bio *r1_bio, struct mddev *mddev, struct bio *bio)
1178{
1179 r1_bio->master_bio = bio;
1180 r1_bio->sectors = bio_sectors(bio);
1181 r1_bio->state = 0;
1182 r1_bio->mddev = mddev;
1183 r1_bio->sector = bio->bi_iter.bi_sector;
1184}
1185
1187static inline struct r1bio * 1186static inline struct r1bio *
1188alloc_r1bio(struct mddev *mddev, struct bio *bio, sector_t sectors_handled) 1187alloc_r1bio(struct mddev *mddev, struct bio *bio)
1189{ 1188{
1190 struct r1conf *conf = mddev->private; 1189 struct r1conf *conf = mddev->private;
1191 struct r1bio *r1_bio; 1190 struct r1bio *r1_bio;
1192 1191
1193 r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); 1192 r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
1194 1193 /* Ensure no bio records IO_BLOCKED */
1195 r1_bio->master_bio = bio; 1194 memset(r1_bio->bios, 0, conf->raid_disks * sizeof(r1_bio->bios[0]));
1196 r1_bio->sectors = bio_sectors(bio) - sectors_handled; 1195 init_r1bio(r1_bio, mddev, bio);
1197 r1_bio->state = 0;
1198 r1_bio->mddev = mddev;
1199 r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
1200
1201 return r1_bio; 1196 return r1_bio;
1202} 1197}
1203 1198
1204static void raid1_read_request(struct mddev *mddev, struct bio *bio, 1199static void raid1_read_request(struct mddev *mddev, struct bio *bio,
1205 int max_read_sectors) 1200 int max_read_sectors, struct r1bio *r1_bio)
1206{ 1201{
1207 struct r1conf *conf = mddev->private; 1202 struct r1conf *conf = mddev->private;
1208 struct raid1_info *mirror; 1203 struct raid1_info *mirror;
1209 struct r1bio *r1_bio;
1210 struct bio *read_bio; 1204 struct bio *read_bio;
1211 struct bitmap *bitmap = mddev->bitmap; 1205 struct bitmap *bitmap = mddev->bitmap;
1212 const int op = bio_op(bio); 1206 const int op = bio_op(bio);
1213 const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); 1207 const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
1214 int max_sectors; 1208 int max_sectors;
1215 int rdisk; 1209 int rdisk;
1210 bool print_msg = !!r1_bio;
1211 char b[BDEVNAME_SIZE];
1212
1213 /*
1214 * If r1_bio is set, we are blocking the raid1d thread
1215 * so there is a tiny risk of deadlock. So ask for
1216 * emergency memory if needed.
1217 */
1218 gfp_t gfp = r1_bio ? (GFP_NOIO | __GFP_HIGH) : GFP_NOIO;
1219
1220 if (print_msg) {
1221 /* Need to get the block device name carefully */
1222 struct md_rdev *rdev;
1223 rcu_read_lock();
1224 rdev = rcu_dereference(conf->mirrors[r1_bio->read_disk].rdev);
1225 if (rdev)
1226 bdevname(rdev->bdev, b);
1227 else
1228 strcpy(b, "???");
1229 rcu_read_unlock();
1230 }
1216 1231
1217 /* 1232 /*
1218 * Still need barrier for READ in case that whole 1233 * Still need barrier for READ in case that whole
@@ -1220,7 +1235,10 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
1220 */ 1235 */
1221 wait_read_barrier(conf, bio->bi_iter.bi_sector); 1236 wait_read_barrier(conf, bio->bi_iter.bi_sector);
1222 1237
1223 r1_bio = alloc_r1bio(mddev, bio, 0); 1238 if (!r1_bio)
1239 r1_bio = alloc_r1bio(mddev, bio);
1240 else
1241 init_r1bio(r1_bio, mddev, bio);
1224 r1_bio->sectors = max_read_sectors; 1242 r1_bio->sectors = max_read_sectors;
1225 1243
1226 /* 1244 /*
@@ -1231,11 +1249,23 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
1231 1249
1232 if (rdisk < 0) { 1250 if (rdisk < 0) {
1233 /* couldn't find anywhere to read from */ 1251 /* couldn't find anywhere to read from */
1252 if (print_msg) {
1253 pr_crit_ratelimited("md/raid1:%s: %s: unrecoverable I/O read error for block %llu\n",
1254 mdname(mddev),
1255 b,
1256 (unsigned long long)r1_bio->sector);
1257 }
1234 raid_end_bio_io(r1_bio); 1258 raid_end_bio_io(r1_bio);
1235 return; 1259 return;
1236 } 1260 }
1237 mirror = conf->mirrors + rdisk; 1261 mirror = conf->mirrors + rdisk;
1238 1262
1263 if (print_msg)
1264 pr_info_ratelimited("md/raid1:%s: redirecting sector %llu to other mirror: %s\n",
1265 mdname(mddev),
1266 (unsigned long long)r1_bio->sector,
1267 bdevname(mirror->rdev->bdev, b));
1268
1239 if (test_bit(WriteMostly, &mirror->rdev->flags) && 1269 if (test_bit(WriteMostly, &mirror->rdev->flags) &&
1240 bitmap) { 1270 bitmap) {
1241 /* 1271 /*
@@ -1249,7 +1279,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
1249 1279
1250 if (max_sectors < bio_sectors(bio)) { 1280 if (max_sectors < bio_sectors(bio)) {
1251 struct bio *split = bio_split(bio, max_sectors, 1281 struct bio *split = bio_split(bio, max_sectors,
1252 GFP_NOIO, conf->bio_split); 1282 gfp, conf->bio_split);
1253 bio_chain(split, bio); 1283 bio_chain(split, bio);
1254 generic_make_request(bio); 1284 generic_make_request(bio);
1255 bio = split; 1285 bio = split;
@@ -1259,7 +1289,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
1259 1289
1260 r1_bio->read_disk = rdisk; 1290 r1_bio->read_disk = rdisk;
1261 1291
1262 read_bio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); 1292 read_bio = bio_clone_fast(bio, gfp, mddev->bio_set);
1263 1293
1264 r1_bio->bios[rdisk] = read_bio; 1294 r1_bio->bios[rdisk] = read_bio;
1265 1295
@@ -1331,7 +1361,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
1331 } 1361 }
1332 wait_barrier(conf, bio->bi_iter.bi_sector); 1362 wait_barrier(conf, bio->bi_iter.bi_sector);
1333 1363
1334 r1_bio = alloc_r1bio(mddev, bio, 0); 1364 r1_bio = alloc_r1bio(mddev, bio);
1335 r1_bio->sectors = max_write_sectors; 1365 r1_bio->sectors = max_write_sectors;
1336 1366
1337 if (conf->pending_count >= max_queued_requests) { 1367 if (conf->pending_count >= max_queued_requests) {
@@ -1551,7 +1581,7 @@ static void raid1_make_request(struct mddev *mddev, struct bio *bio)
1551 bio->bi_iter.bi_sector, bio_sectors(bio)); 1581 bio->bi_iter.bi_sector, bio_sectors(bio));
1552 1582
1553 if (bio_data_dir(bio) == READ) 1583 if (bio_data_dir(bio) == READ)
1554 raid1_read_request(mddev, bio, sectors); 1584 raid1_read_request(mddev, bio, sectors, NULL);
1555 else 1585 else
1556 raid1_write_request(mddev, bio, sectors); 1586 raid1_write_request(mddev, bio, sectors);
1557} 1587}
@@ -2443,11 +2473,8 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
2443 2473
2444static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) 2474static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
2445{ 2475{
2446 int disk;
2447 int max_sectors;
2448 struct mddev *mddev = conf->mddev; 2476 struct mddev *mddev = conf->mddev;
2449 struct bio *bio; 2477 struct bio *bio;
2450 char b[BDEVNAME_SIZE];
2451 struct md_rdev *rdev; 2478 struct md_rdev *rdev;
2452 dev_t bio_dev; 2479 dev_t bio_dev;
2453 sector_t bio_sector; 2480 sector_t bio_sector;
@@ -2463,7 +2490,6 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
2463 */ 2490 */
2464 2491
2465 bio = r1_bio->bios[r1_bio->read_disk]; 2492 bio = r1_bio->bios[r1_bio->read_disk];
2466 bdevname(bio->bi_bdev, b);
2467 bio_dev = bio->bi_bdev->bd_dev; 2493 bio_dev = bio->bi_bdev->bd_dev;
2468 bio_sector = conf->mirrors[r1_bio->read_disk].rdev->data_offset + r1_bio->sector; 2494 bio_sector = conf->mirrors[r1_bio->read_disk].rdev->data_offset + r1_bio->sector;
2469 bio_put(bio); 2495 bio_put(bio);
@@ -2481,58 +2507,12 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
2481 } 2507 }
2482 2508
2483 rdev_dec_pending(rdev, conf->mddev); 2509 rdev_dec_pending(rdev, conf->mddev);
2510 allow_barrier(conf, r1_bio->sector);
2511 bio = r1_bio->master_bio;
2484 2512
2485read_more: 2513 /* Reuse the old r1_bio so that the IO_BLOCKED settings are preserved */
2486 disk = read_balance(conf, r1_bio, &max_sectors); 2514 r1_bio->state = 0;
2487 if (disk == -1) { 2515 raid1_read_request(mddev, bio, r1_bio->sectors, r1_bio);
2488 pr_crit_ratelimited("md/raid1:%s: %s: unrecoverable I/O read error for block %llu\n",
2489 mdname(mddev), b, (unsigned long long)r1_bio->sector);
2490 raid_end_bio_io(r1_bio);
2491 } else {
2492 const unsigned long do_sync
2493 = r1_bio->master_bio->bi_opf & REQ_SYNC;
2494 r1_bio->read_disk = disk;
2495 bio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO,
2496 mddev->bio_set);
2497 bio_trim(bio, r1_bio->sector - bio->bi_iter.bi_sector,
2498 max_sectors);
2499 r1_bio->bios[r1_bio->read_disk] = bio;
2500 rdev = conf->mirrors[disk].rdev;
2501 pr_info_ratelimited("md/raid1:%s: redirecting sector %llu to other mirror: %s\n",
2502 mdname(mddev),
2503 (unsigned long long)r1_bio->sector,
2504 bdevname(rdev->bdev, b));
2505 bio->bi_iter.bi_sector = r1_bio->sector + rdev->data_offset;
2506 bio->bi_bdev = rdev->bdev;
2507 bio->bi_end_io = raid1_end_read_request;
2508 bio_set_op_attrs(bio, REQ_OP_READ, do_sync);
2509 if (test_bit(FailFast, &rdev->flags) &&
2510 test_bit(R1BIO_FailFast, &r1_bio->state))
2511 bio->bi_opf |= MD_FAILFAST;
2512 bio->bi_private = r1_bio;
2513 if (max_sectors < r1_bio->sectors) {
2514 /* Drat - have to split this up more */
2515 struct bio *mbio = r1_bio->master_bio;
2516 int sectors_handled = (r1_bio->sector + max_sectors
2517 - mbio->bi_iter.bi_sector);
2518 r1_bio->sectors = max_sectors;
2519 bio_inc_remaining(mbio);
2520 trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
2521 bio, bio_dev, bio_sector);
2522 generic_make_request(bio);
2523 bio = NULL;
2524
2525 r1_bio = alloc_r1bio(mddev, mbio, sectors_handled);
2526 set_bit(R1BIO_ReadError, &r1_bio->state);
2527 inc_pending(conf, r1_bio->sector);
2528
2529 goto read_more;
2530 } else {
2531 trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
2532 bio, bio_dev, bio_sector);
2533 generic_make_request(bio);
2534 }
2535 }
2536} 2516}
2537 2517
2538static void raid1d(struct md_thread *thread) 2518static void raid1d(struct md_thread *thread)