diff options
author | NeilBrown <neilb@suse.com> | 2017-04-05 00:05:50 -0400 |
---|---|---|
committer | Shaohua Li <shli@fb.com> | 2017-04-11 13:10:20 -0400 |
commit | 689389a06ce79fdced85b5115717f71c71e623e0 (patch) | |
tree | beacf4e145c55913542aea4f83188bec206d9221 /drivers/md/raid1.c | |
parent | 50512625da06c41517cb596f51b923ce15f401a4 (diff) |
md/raid1: simplify handle_read_error().
handle_read_error() duplicates a lot of the work that raid1_read_request()
does, so it makes sense to just use that function.
This doesn't quite work as handle_read_error() relies on the same r1bio
being re-used so that, in the case of a read-only array, setting
IO_BLOCKED in r1bio->bios[] ensures read_balance() won't re-use
that device.
So we need to allow a r1bio to be passed to raid1_read_request(), and to
have that function mostly initialise the r1bio, but leave the bios[]
array untouched.
Two parts of handle_read_error() that need to be preserved are the warning
message it prints, so they are conditionally added to raid1_read_request().
Note that this highlights a minor bug on alloc_r1bio(). It doesn't
initalise the bios[] array, so it is possible that old content is there,
which might cause read_balance() to ignore some devices with no good reason.
With this change, we no longer need inc_pending(), or the sectors_handled
arg to alloc_r1bio().
As handle_read_error() is called from raid1d() and allocates memory,
there is tiny chance of a deadlock. All element of various pools
could be queued waiting for raid1 to handle them, and there may be no
extra memory free.
Achieving guaranteed forward progress would probably require a second
thread and another mempool. Instead of that complexity, add
__GFP_HIGH to any allocations when read1_read_request() is called
from raid1d.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r-- | drivers/md/raid1.c | 140 |
1 files changed, 60 insertions, 80 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 18af00c86b42..29a9aa9254c3 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -988,16 +988,6 @@ static void wait_read_barrier(struct r1conf *conf, sector_t sector_nr) | |||
988 | spin_unlock_irq(&conf->resync_lock); | 988 | spin_unlock_irq(&conf->resync_lock); |
989 | } | 989 | } |
990 | 990 | ||
991 | static void inc_pending(struct r1conf *conf, sector_t bi_sector) | ||
992 | { | ||
993 | /* The current request requires multiple r1_bio, so | ||
994 | * we need to increment the pending count, and the corresponding | ||
995 | * window count. | ||
996 | */ | ||
997 | int idx = sector_to_idx(bi_sector); | ||
998 | atomic_inc(&conf->nr_pending[idx]); | ||
999 | } | ||
1000 | |||
1001 | static void wait_barrier(struct r1conf *conf, sector_t sector_nr) | 991 | static void wait_barrier(struct r1conf *conf, sector_t sector_nr) |
1002 | { | 992 | { |
1003 | int idx = sector_to_idx(sector_nr); | 993 | int idx = sector_to_idx(sector_nr); |
@@ -1184,35 +1174,60 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) | |||
1184 | kfree(plug); | 1174 | kfree(plug); |
1185 | } | 1175 | } |
1186 | 1176 | ||
1177 | static void init_r1bio(struct r1bio *r1_bio, struct mddev *mddev, struct bio *bio) | ||
1178 | { | ||
1179 | r1_bio->master_bio = bio; | ||
1180 | r1_bio->sectors = bio_sectors(bio); | ||
1181 | r1_bio->state = 0; | ||
1182 | r1_bio->mddev = mddev; | ||
1183 | r1_bio->sector = bio->bi_iter.bi_sector; | ||
1184 | } | ||
1185 | |||
1187 | static inline struct r1bio * | 1186 | static inline struct r1bio * |
1188 | alloc_r1bio(struct mddev *mddev, struct bio *bio, sector_t sectors_handled) | 1187 | alloc_r1bio(struct mddev *mddev, struct bio *bio) |
1189 | { | 1188 | { |
1190 | struct r1conf *conf = mddev->private; | 1189 | struct r1conf *conf = mddev->private; |
1191 | struct r1bio *r1_bio; | 1190 | struct r1bio *r1_bio; |
1192 | 1191 | ||
1193 | r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); | 1192 | r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); |
1194 | 1193 | /* Ensure no bio records IO_BLOCKED */ | |
1195 | r1_bio->master_bio = bio; | 1194 | memset(r1_bio->bios, 0, conf->raid_disks * sizeof(r1_bio->bios[0])); |
1196 | r1_bio->sectors = bio_sectors(bio) - sectors_handled; | 1195 | init_r1bio(r1_bio, mddev, bio); |
1197 | r1_bio->state = 0; | ||
1198 | r1_bio->mddev = mddev; | ||
1199 | r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled; | ||
1200 | |||
1201 | return r1_bio; | 1196 | return r1_bio; |
1202 | } | 1197 | } |
1203 | 1198 | ||
1204 | static void raid1_read_request(struct mddev *mddev, struct bio *bio, | 1199 | static void raid1_read_request(struct mddev *mddev, struct bio *bio, |
1205 | int max_read_sectors) | 1200 | int max_read_sectors, struct r1bio *r1_bio) |
1206 | { | 1201 | { |
1207 | struct r1conf *conf = mddev->private; | 1202 | struct r1conf *conf = mddev->private; |
1208 | struct raid1_info *mirror; | 1203 | struct raid1_info *mirror; |
1209 | struct r1bio *r1_bio; | ||
1210 | struct bio *read_bio; | 1204 | struct bio *read_bio; |
1211 | struct bitmap *bitmap = mddev->bitmap; | 1205 | struct bitmap *bitmap = mddev->bitmap; |
1212 | const int op = bio_op(bio); | 1206 | const int op = bio_op(bio); |
1213 | const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); | 1207 | const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); |
1214 | int max_sectors; | 1208 | int max_sectors; |
1215 | int rdisk; | 1209 | int rdisk; |
1210 | bool print_msg = !!r1_bio; | ||
1211 | char b[BDEVNAME_SIZE]; | ||
1212 | |||
1213 | /* | ||
1214 | * If r1_bio is set, we are blocking the raid1d thread | ||
1215 | * so there is a tiny risk of deadlock. So ask for | ||
1216 | * emergency memory if needed. | ||
1217 | */ | ||
1218 | gfp_t gfp = r1_bio ? (GFP_NOIO | __GFP_HIGH) : GFP_NOIO; | ||
1219 | |||
1220 | if (print_msg) { | ||
1221 | /* Need to get the block device name carefully */ | ||
1222 | struct md_rdev *rdev; | ||
1223 | rcu_read_lock(); | ||
1224 | rdev = rcu_dereference(conf->mirrors[r1_bio->read_disk].rdev); | ||
1225 | if (rdev) | ||
1226 | bdevname(rdev->bdev, b); | ||
1227 | else | ||
1228 | strcpy(b, "???"); | ||
1229 | rcu_read_unlock(); | ||
1230 | } | ||
1216 | 1231 | ||
1217 | /* | 1232 | /* |
1218 | * Still need barrier for READ in case that whole | 1233 | * Still need barrier for READ in case that whole |
@@ -1220,7 +1235,10 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, | |||
1220 | */ | 1235 | */ |
1221 | wait_read_barrier(conf, bio->bi_iter.bi_sector); | 1236 | wait_read_barrier(conf, bio->bi_iter.bi_sector); |
1222 | 1237 | ||
1223 | r1_bio = alloc_r1bio(mddev, bio, 0); | 1238 | if (!r1_bio) |
1239 | r1_bio = alloc_r1bio(mddev, bio); | ||
1240 | else | ||
1241 | init_r1bio(r1_bio, mddev, bio); | ||
1224 | r1_bio->sectors = max_read_sectors; | 1242 | r1_bio->sectors = max_read_sectors; |
1225 | 1243 | ||
1226 | /* | 1244 | /* |
@@ -1231,11 +1249,23 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, | |||
1231 | 1249 | ||
1232 | if (rdisk < 0) { | 1250 | if (rdisk < 0) { |
1233 | /* couldn't find anywhere to read from */ | 1251 | /* couldn't find anywhere to read from */ |
1252 | if (print_msg) { | ||
1253 | pr_crit_ratelimited("md/raid1:%s: %s: unrecoverable I/O read error for block %llu\n", | ||
1254 | mdname(mddev), | ||
1255 | b, | ||
1256 | (unsigned long long)r1_bio->sector); | ||
1257 | } | ||
1234 | raid_end_bio_io(r1_bio); | 1258 | raid_end_bio_io(r1_bio); |
1235 | return; | 1259 | return; |
1236 | } | 1260 | } |
1237 | mirror = conf->mirrors + rdisk; | 1261 | mirror = conf->mirrors + rdisk; |
1238 | 1262 | ||
1263 | if (print_msg) | ||
1264 | pr_info_ratelimited("md/raid1:%s: redirecting sector %llu to other mirror: %s\n", | ||
1265 | mdname(mddev), | ||
1266 | (unsigned long long)r1_bio->sector, | ||
1267 | bdevname(mirror->rdev->bdev, b)); | ||
1268 | |||
1239 | if (test_bit(WriteMostly, &mirror->rdev->flags) && | 1269 | if (test_bit(WriteMostly, &mirror->rdev->flags) && |
1240 | bitmap) { | 1270 | bitmap) { |
1241 | /* | 1271 | /* |
@@ -1249,7 +1279,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, | |||
1249 | 1279 | ||
1250 | if (max_sectors < bio_sectors(bio)) { | 1280 | if (max_sectors < bio_sectors(bio)) { |
1251 | struct bio *split = bio_split(bio, max_sectors, | 1281 | struct bio *split = bio_split(bio, max_sectors, |
1252 | GFP_NOIO, conf->bio_split); | 1282 | gfp, conf->bio_split); |
1253 | bio_chain(split, bio); | 1283 | bio_chain(split, bio); |
1254 | generic_make_request(bio); | 1284 | generic_make_request(bio); |
1255 | bio = split; | 1285 | bio = split; |
@@ -1259,7 +1289,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, | |||
1259 | 1289 | ||
1260 | r1_bio->read_disk = rdisk; | 1290 | r1_bio->read_disk = rdisk; |
1261 | 1291 | ||
1262 | read_bio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); | 1292 | read_bio = bio_clone_fast(bio, gfp, mddev->bio_set); |
1263 | 1293 | ||
1264 | r1_bio->bios[rdisk] = read_bio; | 1294 | r1_bio->bios[rdisk] = read_bio; |
1265 | 1295 | ||
@@ -1331,7 +1361,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, | |||
1331 | } | 1361 | } |
1332 | wait_barrier(conf, bio->bi_iter.bi_sector); | 1362 | wait_barrier(conf, bio->bi_iter.bi_sector); |
1333 | 1363 | ||
1334 | r1_bio = alloc_r1bio(mddev, bio, 0); | 1364 | r1_bio = alloc_r1bio(mddev, bio); |
1335 | r1_bio->sectors = max_write_sectors; | 1365 | r1_bio->sectors = max_write_sectors; |
1336 | 1366 | ||
1337 | if (conf->pending_count >= max_queued_requests) { | 1367 | if (conf->pending_count >= max_queued_requests) { |
@@ -1551,7 +1581,7 @@ static void raid1_make_request(struct mddev *mddev, struct bio *bio) | |||
1551 | bio->bi_iter.bi_sector, bio_sectors(bio)); | 1581 | bio->bi_iter.bi_sector, bio_sectors(bio)); |
1552 | 1582 | ||
1553 | if (bio_data_dir(bio) == READ) | 1583 | if (bio_data_dir(bio) == READ) |
1554 | raid1_read_request(mddev, bio, sectors); | 1584 | raid1_read_request(mddev, bio, sectors, NULL); |
1555 | else | 1585 | else |
1556 | raid1_write_request(mddev, bio, sectors); | 1586 | raid1_write_request(mddev, bio, sectors); |
1557 | } | 1587 | } |
@@ -2443,11 +2473,8 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio) | |||
2443 | 2473 | ||
2444 | static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) | 2474 | static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) |
2445 | { | 2475 | { |
2446 | int disk; | ||
2447 | int max_sectors; | ||
2448 | struct mddev *mddev = conf->mddev; | 2476 | struct mddev *mddev = conf->mddev; |
2449 | struct bio *bio; | 2477 | struct bio *bio; |
2450 | char b[BDEVNAME_SIZE]; | ||
2451 | struct md_rdev *rdev; | 2478 | struct md_rdev *rdev; |
2452 | dev_t bio_dev; | 2479 | dev_t bio_dev; |
2453 | sector_t bio_sector; | 2480 | sector_t bio_sector; |
@@ -2463,7 +2490,6 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) | |||
2463 | */ | 2490 | */ |
2464 | 2491 | ||
2465 | bio = r1_bio->bios[r1_bio->read_disk]; | 2492 | bio = r1_bio->bios[r1_bio->read_disk]; |
2466 | bdevname(bio->bi_bdev, b); | ||
2467 | bio_dev = bio->bi_bdev->bd_dev; | 2493 | bio_dev = bio->bi_bdev->bd_dev; |
2468 | bio_sector = conf->mirrors[r1_bio->read_disk].rdev->data_offset + r1_bio->sector; | 2494 | bio_sector = conf->mirrors[r1_bio->read_disk].rdev->data_offset + r1_bio->sector; |
2469 | bio_put(bio); | 2495 | bio_put(bio); |
@@ -2481,58 +2507,12 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) | |||
2481 | } | 2507 | } |
2482 | 2508 | ||
2483 | rdev_dec_pending(rdev, conf->mddev); | 2509 | rdev_dec_pending(rdev, conf->mddev); |
2510 | allow_barrier(conf, r1_bio->sector); | ||
2511 | bio = r1_bio->master_bio; | ||
2484 | 2512 | ||
2485 | read_more: | 2513 | /* Reuse the old r1_bio so that the IO_BLOCKED settings are preserved */ |
2486 | disk = read_balance(conf, r1_bio, &max_sectors); | 2514 | r1_bio->state = 0; |
2487 | if (disk == -1) { | 2515 | raid1_read_request(mddev, bio, r1_bio->sectors, r1_bio); |
2488 | pr_crit_ratelimited("md/raid1:%s: %s: unrecoverable I/O read error for block %llu\n", | ||
2489 | mdname(mddev), b, (unsigned long long)r1_bio->sector); | ||
2490 | raid_end_bio_io(r1_bio); | ||
2491 | } else { | ||
2492 | const unsigned long do_sync | ||
2493 | = r1_bio->master_bio->bi_opf & REQ_SYNC; | ||
2494 | r1_bio->read_disk = disk; | ||
2495 | bio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO, | ||
2496 | mddev->bio_set); | ||
2497 | bio_trim(bio, r1_bio->sector - bio->bi_iter.bi_sector, | ||
2498 | max_sectors); | ||
2499 | r1_bio->bios[r1_bio->read_disk] = bio; | ||
2500 | rdev = conf->mirrors[disk].rdev; | ||
2501 | pr_info_ratelimited("md/raid1:%s: redirecting sector %llu to other mirror: %s\n", | ||
2502 | mdname(mddev), | ||
2503 | (unsigned long long)r1_bio->sector, | ||
2504 | bdevname(rdev->bdev, b)); | ||
2505 | bio->bi_iter.bi_sector = r1_bio->sector + rdev->data_offset; | ||
2506 | bio->bi_bdev = rdev->bdev; | ||
2507 | bio->bi_end_io = raid1_end_read_request; | ||
2508 | bio_set_op_attrs(bio, REQ_OP_READ, do_sync); | ||
2509 | if (test_bit(FailFast, &rdev->flags) && | ||
2510 | test_bit(R1BIO_FailFast, &r1_bio->state)) | ||
2511 | bio->bi_opf |= MD_FAILFAST; | ||
2512 | bio->bi_private = r1_bio; | ||
2513 | if (max_sectors < r1_bio->sectors) { | ||
2514 | /* Drat - have to split this up more */ | ||
2515 | struct bio *mbio = r1_bio->master_bio; | ||
2516 | int sectors_handled = (r1_bio->sector + max_sectors | ||
2517 | - mbio->bi_iter.bi_sector); | ||
2518 | r1_bio->sectors = max_sectors; | ||
2519 | bio_inc_remaining(mbio); | ||
2520 | trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), | ||
2521 | bio, bio_dev, bio_sector); | ||
2522 | generic_make_request(bio); | ||
2523 | bio = NULL; | ||
2524 | |||
2525 | r1_bio = alloc_r1bio(mddev, mbio, sectors_handled); | ||
2526 | set_bit(R1BIO_ReadError, &r1_bio->state); | ||
2527 | inc_pending(conf, r1_bio->sector); | ||
2528 | |||
2529 | goto read_more; | ||
2530 | } else { | ||
2531 | trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), | ||
2532 | bio, bio_dev, bio_sector); | ||
2533 | generic_make_request(bio); | ||
2534 | } | ||
2535 | } | ||
2536 | } | 2516 | } |
2537 | 2517 | ||
2538 | static void raid1d(struct md_thread *thread) | 2518 | static void raid1d(struct md_thread *thread) |