diff options
author | Robert LeBlanc <robert@leblancnet.us> | 2016-12-05 15:02:58 -0500 |
---|---|---|
committer | Shaohua Li <shli@fb.com> | 2017-01-03 11:56:52 -0500 |
commit | bb5f1ed70bc3bbbce510907da3432dab267ff508 (patch) | |
tree | 82fc6fc28ca0d4a8eecb85ab6e84309416f894e1 | |
parent | 3b046a97cbd35a73e1eef968dbfb1a0aac745a77 (diff) |
md/raid10: Refactor raid10_make_request
Refactor raid10_make_request into seperate read and write functions to
clean up the code.
Shaohua: add the recovery check back to read path
Signed-off-by: Robert LeBlanc <robert@leblancnet.us>
Signed-off-by: Shaohua Li <shli@fb.com>
-rw-r--r-- | drivers/md/raid10.c | 245 |
1 files changed, 140 insertions, 105 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ab5e86209322..1920756828df 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -1087,23 +1087,122 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) | |||
1087 | kfree(plug); | 1087 | kfree(plug); |
1088 | } | 1088 | } |
1089 | 1089 | ||
1090 | static void __make_request(struct mddev *mddev, struct bio *bio) | 1090 | static void raid10_read_request(struct mddev *mddev, struct bio *bio, |
1091 | struct r10bio *r10_bio) | ||
1091 | { | 1092 | { |
1092 | struct r10conf *conf = mddev->private; | 1093 | struct r10conf *conf = mddev->private; |
1093 | struct r10bio *r10_bio; | ||
1094 | struct bio *read_bio; | 1094 | struct bio *read_bio; |
1095 | const int op = bio_op(bio); | ||
1096 | const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); | ||
1097 | int sectors_handled; | ||
1098 | int max_sectors; | ||
1099 | sector_t sectors; | ||
1100 | struct md_rdev *rdev; | ||
1101 | int slot; | ||
1102 | |||
1103 | /* | ||
1104 | * Register the new request and wait if the reconstruction | ||
1105 | * thread has put up a bar for new requests. | ||
1106 | * Continue immediately if no resync is active currently. | ||
1107 | */ | ||
1108 | wait_barrier(conf); | ||
1109 | |||
1110 | sectors = bio_sectors(bio); | ||
1111 | while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && | ||
1112 | bio->bi_iter.bi_sector < conf->reshape_progress && | ||
1113 | bio->bi_iter.bi_sector + sectors > conf->reshape_progress) { | ||
1114 | /* | ||
1115 | * IO spans the reshape position. Need to wait for reshape to | ||
1116 | * pass | ||
1117 | */ | ||
1118 | raid10_log(conf->mddev, "wait reshape"); | ||
1119 | allow_barrier(conf); | ||
1120 | wait_event(conf->wait_barrier, | ||
1121 | conf->reshape_progress <= bio->bi_iter.bi_sector || | ||
1122 | conf->reshape_progress >= bio->bi_iter.bi_sector + | ||
1123 | sectors); | ||
1124 | wait_barrier(conf); | ||
1125 | } | ||
1126 | |||
1127 | read_again: | ||
1128 | rdev = read_balance(conf, r10_bio, &max_sectors); | ||
1129 | if (!rdev) { | ||
1130 | raid_end_bio_io(r10_bio); | ||
1131 | return; | ||
1132 | } | ||
1133 | slot = r10_bio->read_slot; | ||
1134 | |||
1135 | read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); | ||
1136 | bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector, | ||
1137 | max_sectors); | ||
1138 | |||
1139 | r10_bio->devs[slot].bio = read_bio; | ||
1140 | r10_bio->devs[slot].rdev = rdev; | ||
1141 | |||
1142 | read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr + | ||
1143 | choose_data_offset(r10_bio, rdev); | ||
1144 | read_bio->bi_bdev = rdev->bdev; | ||
1145 | read_bio->bi_end_io = raid10_end_read_request; | ||
1146 | bio_set_op_attrs(read_bio, op, do_sync); | ||
1147 | if (test_bit(FailFast, &rdev->flags) && | ||
1148 | test_bit(R10BIO_FailFast, &r10_bio->state)) | ||
1149 | read_bio->bi_opf |= MD_FAILFAST; | ||
1150 | read_bio->bi_private = r10_bio; | ||
1151 | |||
1152 | if (mddev->gendisk) | ||
1153 | trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev), | ||
1154 | read_bio, disk_devt(mddev->gendisk), | ||
1155 | r10_bio->sector); | ||
1156 | if (max_sectors < r10_bio->sectors) { | ||
1157 | /* | ||
1158 | * Could not read all from this device, so we will need another | ||
1159 | * r10_bio. | ||
1160 | */ | ||
1161 | sectors_handled = (r10_bio->sector + max_sectors | ||
1162 | - bio->bi_iter.bi_sector); | ||
1163 | r10_bio->sectors = max_sectors; | ||
1164 | spin_lock_irq(&conf->device_lock); | ||
1165 | if (bio->bi_phys_segments == 0) | ||
1166 | bio->bi_phys_segments = 2; | ||
1167 | else | ||
1168 | bio->bi_phys_segments++; | ||
1169 | spin_unlock_irq(&conf->device_lock); | ||
1170 | /* | ||
1171 | * Cannot call generic_make_request directly as that will be | ||
1172 | * queued in __generic_make_request and subsequent | ||
1173 | * mempool_alloc might block waiting for it. so hand bio over | ||
1174 | * to raid10d. | ||
1175 | */ | ||
1176 | reschedule_retry(r10_bio); | ||
1177 | |||
1178 | r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); | ||
1179 | |||
1180 | r10_bio->master_bio = bio; | ||
1181 | r10_bio->sectors = bio_sectors(bio) - sectors_handled; | ||
1182 | r10_bio->state = 0; | ||
1183 | r10_bio->mddev = mddev; | ||
1184 | r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled; | ||
1185 | goto read_again; | ||
1186 | } else | ||
1187 | generic_make_request(read_bio); | ||
1188 | return; | ||
1189 | } | ||
1190 | |||
1191 | static void raid10_write_request(struct mddev *mddev, struct bio *bio, | ||
1192 | struct r10bio *r10_bio) | ||
1193 | { | ||
1194 | struct r10conf *conf = mddev->private; | ||
1095 | int i; | 1195 | int i; |
1096 | const int op = bio_op(bio); | 1196 | const int op = bio_op(bio); |
1097 | const int rw = bio_data_dir(bio); | ||
1098 | const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); | 1197 | const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); |
1099 | const unsigned long do_fua = (bio->bi_opf & REQ_FUA); | 1198 | const unsigned long do_fua = (bio->bi_opf & REQ_FUA); |
1100 | unsigned long flags; | 1199 | unsigned long flags; |
1101 | struct md_rdev *blocked_rdev; | 1200 | struct md_rdev *blocked_rdev; |
1102 | struct blk_plug_cb *cb; | 1201 | struct blk_plug_cb *cb; |
1103 | struct raid10_plug_cb *plug = NULL; | 1202 | struct raid10_plug_cb *plug = NULL; |
1203 | sector_t sectors; | ||
1104 | int sectors_handled; | 1204 | int sectors_handled; |
1105 | int max_sectors; | 1205 | int max_sectors; |
1106 | int sectors; | ||
1107 | 1206 | ||
1108 | md_write_start(mddev, bio); | 1207 | md_write_start(mddev, bio); |
1109 | 1208 | ||
@@ -1118,8 +1217,9 @@ static void __make_request(struct mddev *mddev, struct bio *bio) | |||
1118 | while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && | 1217 | while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && |
1119 | bio->bi_iter.bi_sector < conf->reshape_progress && | 1218 | bio->bi_iter.bi_sector < conf->reshape_progress && |
1120 | bio->bi_iter.bi_sector + sectors > conf->reshape_progress) { | 1219 | bio->bi_iter.bi_sector + sectors > conf->reshape_progress) { |
1121 | /* IO spans the reshape position. Need to wait for | 1220 | /* |
1122 | * reshape to pass | 1221 | * IO spans the reshape position. Need to wait for reshape to |
1222 | * pass | ||
1123 | */ | 1223 | */ |
1124 | raid10_log(conf->mddev, "wait reshape"); | 1224 | raid10_log(conf->mddev, "wait reshape"); |
1125 | allow_barrier(conf); | 1225 | allow_barrier(conf); |
@@ -1129,8 +1229,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio) | |||
1129 | sectors); | 1229 | sectors); |
1130 | wait_barrier(conf); | 1230 | wait_barrier(conf); |
1131 | } | 1231 | } |
1232 | |||
1132 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && | 1233 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && |
1133 | bio_data_dir(bio) == WRITE && | ||
1134 | (mddev->reshape_backwards | 1234 | (mddev->reshape_backwards |
1135 | ? (bio->bi_iter.bi_sector < conf->reshape_safe && | 1235 | ? (bio->bi_iter.bi_sector < conf->reshape_safe && |
1136 | bio->bi_iter.bi_sector + sectors > conf->reshape_progress) | 1236 | bio->bi_iter.bi_sector + sectors > conf->reshape_progress) |
@@ -1148,98 +1248,6 @@ static void __make_request(struct mddev *mddev, struct bio *bio) | |||
1148 | conf->reshape_safe = mddev->reshape_position; | 1248 | conf->reshape_safe = mddev->reshape_position; |
1149 | } | 1249 | } |
1150 | 1250 | ||
1151 | r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); | ||
1152 | |||
1153 | r10_bio->master_bio = bio; | ||
1154 | r10_bio->sectors = sectors; | ||
1155 | |||
1156 | r10_bio->mddev = mddev; | ||
1157 | r10_bio->sector = bio->bi_iter.bi_sector; | ||
1158 | r10_bio->state = 0; | ||
1159 | |||
1160 | /* We might need to issue multiple reads to different | ||
1161 | * devices if there are bad blocks around, so we keep | ||
1162 | * track of the number of reads in bio->bi_phys_segments. | ||
1163 | * If this is 0, there is only one r10_bio and no locking | ||
1164 | * will be needed when the request completes. If it is | ||
1165 | * non-zero, then it is the number of not-completed requests. | ||
1166 | */ | ||
1167 | bio->bi_phys_segments = 0; | ||
1168 | bio_clear_flag(bio, BIO_SEG_VALID); | ||
1169 | |||
1170 | if (rw == READ) { | ||
1171 | /* | ||
1172 | * read balancing logic: | ||
1173 | */ | ||
1174 | struct md_rdev *rdev; | ||
1175 | int slot; | ||
1176 | |||
1177 | read_again: | ||
1178 | rdev = read_balance(conf, r10_bio, &max_sectors); | ||
1179 | if (!rdev) { | ||
1180 | raid_end_bio_io(r10_bio); | ||
1181 | return; | ||
1182 | } | ||
1183 | slot = r10_bio->read_slot; | ||
1184 | |||
1185 | read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); | ||
1186 | bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector, | ||
1187 | max_sectors); | ||
1188 | |||
1189 | r10_bio->devs[slot].bio = read_bio; | ||
1190 | r10_bio->devs[slot].rdev = rdev; | ||
1191 | |||
1192 | read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr + | ||
1193 | choose_data_offset(r10_bio, rdev); | ||
1194 | read_bio->bi_bdev = rdev->bdev; | ||
1195 | read_bio->bi_end_io = raid10_end_read_request; | ||
1196 | bio_set_op_attrs(read_bio, op, do_sync); | ||
1197 | if (test_bit(FailFast, &rdev->flags) && | ||
1198 | test_bit(R10BIO_FailFast, &r10_bio->state)) | ||
1199 | read_bio->bi_opf |= MD_FAILFAST; | ||
1200 | read_bio->bi_private = r10_bio; | ||
1201 | |||
1202 | if (mddev->gendisk) | ||
1203 | trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev), | ||
1204 | read_bio, disk_devt(mddev->gendisk), | ||
1205 | r10_bio->sector); | ||
1206 | if (max_sectors < r10_bio->sectors) { | ||
1207 | /* Could not read all from this device, so we will | ||
1208 | * need another r10_bio. | ||
1209 | */ | ||
1210 | sectors_handled = (r10_bio->sector + max_sectors | ||
1211 | - bio->bi_iter.bi_sector); | ||
1212 | r10_bio->sectors = max_sectors; | ||
1213 | spin_lock_irq(&conf->device_lock); | ||
1214 | if (bio->bi_phys_segments == 0) | ||
1215 | bio->bi_phys_segments = 2; | ||
1216 | else | ||
1217 | bio->bi_phys_segments++; | ||
1218 | spin_unlock_irq(&conf->device_lock); | ||
1219 | /* Cannot call generic_make_request directly | ||
1220 | * as that will be queued in __generic_make_request | ||
1221 | * and subsequent mempool_alloc might block | ||
1222 | * waiting for it. so hand bio over to raid10d. | ||
1223 | */ | ||
1224 | reschedule_retry(r10_bio); | ||
1225 | |||
1226 | r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); | ||
1227 | |||
1228 | r10_bio->master_bio = bio; | ||
1229 | r10_bio->sectors = bio_sectors(bio) - sectors_handled; | ||
1230 | r10_bio->state = 0; | ||
1231 | r10_bio->mddev = mddev; | ||
1232 | r10_bio->sector = bio->bi_iter.bi_sector + | ||
1233 | sectors_handled; | ||
1234 | goto read_again; | ||
1235 | } else | ||
1236 | generic_make_request(read_bio); | ||
1237 | return; | ||
1238 | } | ||
1239 | |||
1240 | /* | ||
1241 | * WRITE: | ||
1242 | */ | ||
1243 | if (conf->pending_count >= max_queued_requests) { | 1251 | if (conf->pending_count >= max_queued_requests) { |
1244 | md_wakeup_thread(mddev->thread); | 1252 | md_wakeup_thread(mddev->thread); |
1245 | raid10_log(mddev, "wait queued"); | 1253 | raid10_log(mddev, "wait queued"); |
@@ -1300,8 +1308,7 @@ retry_write: | |||
1300 | int bad_sectors; | 1308 | int bad_sectors; |
1301 | int is_bad; | 1309 | int is_bad; |
1302 | 1310 | ||
1303 | is_bad = is_badblock(rdev, dev_sector, | 1311 | is_bad = is_badblock(rdev, dev_sector, max_sectors, |
1304 | max_sectors, | ||
1305 | &first_bad, &bad_sectors); | 1312 | &first_bad, &bad_sectors); |
1306 | if (is_bad < 0) { | 1313 | if (is_bad < 0) { |
1307 | /* Mustn't write here until the bad block | 1314 | /* Mustn't write here until the bad block |
@@ -1405,8 +1412,7 @@ retry_write: | |||
1405 | r10_bio->devs[i].bio = mbio; | 1412 | r10_bio->devs[i].bio = mbio; |
1406 | 1413 | ||
1407 | mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+ | 1414 | mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+ |
1408 | choose_data_offset(r10_bio, | 1415 | choose_data_offset(r10_bio, rdev)); |
1409 | rdev)); | ||
1410 | mbio->bi_bdev = rdev->bdev; | 1416 | mbio->bi_bdev = rdev->bdev; |
1411 | mbio->bi_end_io = raid10_end_write_request; | 1417 | mbio->bi_end_io = raid10_end_write_request; |
1412 | bio_set_op_attrs(mbio, op, do_sync | do_fua); | 1418 | bio_set_op_attrs(mbio, op, do_sync | do_fua); |
@@ -1457,8 +1463,7 @@ retry_write: | |||
1457 | r10_bio->devs[i].repl_bio = mbio; | 1463 | r10_bio->devs[i].repl_bio = mbio; |
1458 | 1464 | ||
1459 | mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr + | 1465 | mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr + |
1460 | choose_data_offset( | 1466 | choose_data_offset(r10_bio, rdev)); |
1461 | r10_bio, rdev)); | ||
1462 | mbio->bi_bdev = rdev->bdev; | 1467 | mbio->bi_bdev = rdev->bdev; |
1463 | mbio->bi_end_io = raid10_end_write_request; | 1468 | mbio->bi_end_io = raid10_end_write_request; |
1464 | bio_set_op_attrs(mbio, op, do_sync | do_fua); | 1469 | bio_set_op_attrs(mbio, op, do_sync | do_fua); |
@@ -1503,6 +1508,36 @@ retry_write: | |||
1503 | one_write_done(r10_bio); | 1508 | one_write_done(r10_bio); |
1504 | } | 1509 | } |
1505 | 1510 | ||
1511 | static void __make_request(struct mddev *mddev, struct bio *bio) | ||
1512 | { | ||
1513 | struct r10conf *conf = mddev->private; | ||
1514 | struct r10bio *r10_bio; | ||
1515 | |||
1516 | r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); | ||
1517 | |||
1518 | r10_bio->master_bio = bio; | ||
1519 | r10_bio->sectors = bio_sectors(bio); | ||
1520 | |||
1521 | r10_bio->mddev = mddev; | ||
1522 | r10_bio->sector = bio->bi_iter.bi_sector; | ||
1523 | r10_bio->state = 0; | ||
1524 | |||
1525 | /* | ||
1526 | * We might need to issue multiple reads to different devices if there | ||
1527 | * are bad blocks around, so we keep track of the number of reads in | ||
1528 | * bio->bi_phys_segments. If this is 0, there is only one r10_bio and | ||
1529 | * no locking will be needed when the request completes. If it is | ||
1530 | * non-zero, then it is the number of not-completed requests. | ||
1531 | */ | ||
1532 | bio->bi_phys_segments = 0; | ||
1533 | bio_clear_flag(bio, BIO_SEG_VALID); | ||
1534 | |||
1535 | if (bio_data_dir(bio) == READ) | ||
1536 | raid10_read_request(mddev, bio, r10_bio); | ||
1537 | else | ||
1538 | raid10_write_request(mddev, bio, r10_bio); | ||
1539 | } | ||
1540 | |||
1506 | static void raid10_make_request(struct mddev *mddev, struct bio *bio) | 1541 | static void raid10_make_request(struct mddev *mddev, struct bio *bio) |
1507 | { | 1542 | { |
1508 | struct r10conf *conf = mddev->private; | 1543 | struct r10conf *conf = mddev->private; |