aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRobert LeBlanc <robert@leblancnet.us>2016-12-05 15:02:58 -0500
committerShaohua Li <shli@fb.com>2017-01-03 11:56:52 -0500
commitbb5f1ed70bc3bbbce510907da3432dab267ff508 (patch)
tree82fc6fc28ca0d4a8eecb85ab6e84309416f894e1
parent3b046a97cbd35a73e1eef968dbfb1a0aac745a77 (diff)
md/raid10: Refactor raid10_make_request
Refactor raid10_make_request into seperate read and write functions to clean up the code. Shaohua: add the recovery check back to read path Signed-off-by: Robert LeBlanc <robert@leblancnet.us> Signed-off-by: Shaohua Li <shli@fb.com>
-rw-r--r--drivers/md/raid10.c245
1 files changed, 140 insertions, 105 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index ab5e86209322..1920756828df 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1087,23 +1087,122 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
1087 kfree(plug); 1087 kfree(plug);
1088} 1088}
1089 1089
1090static void __make_request(struct mddev *mddev, struct bio *bio) 1090static void raid10_read_request(struct mddev *mddev, struct bio *bio,
1091 struct r10bio *r10_bio)
1091{ 1092{
1092 struct r10conf *conf = mddev->private; 1093 struct r10conf *conf = mddev->private;
1093 struct r10bio *r10_bio;
1094 struct bio *read_bio; 1094 struct bio *read_bio;
1095 const int op = bio_op(bio);
1096 const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
1097 int sectors_handled;
1098 int max_sectors;
1099 sector_t sectors;
1100 struct md_rdev *rdev;
1101 int slot;
1102
1103 /*
1104 * Register the new request and wait if the reconstruction
1105 * thread has put up a bar for new requests.
1106 * Continue immediately if no resync is active currently.
1107 */
1108 wait_barrier(conf);
1109
1110 sectors = bio_sectors(bio);
1111 while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
1112 bio->bi_iter.bi_sector < conf->reshape_progress &&
1113 bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
1114 /*
1115 * IO spans the reshape position. Need to wait for reshape to
1116 * pass
1117 */
1118 raid10_log(conf->mddev, "wait reshape");
1119 allow_barrier(conf);
1120 wait_event(conf->wait_barrier,
1121 conf->reshape_progress <= bio->bi_iter.bi_sector ||
1122 conf->reshape_progress >= bio->bi_iter.bi_sector +
1123 sectors);
1124 wait_barrier(conf);
1125 }
1126
1127read_again:
1128 rdev = read_balance(conf, r10_bio, &max_sectors);
1129 if (!rdev) {
1130 raid_end_bio_io(r10_bio);
1131 return;
1132 }
1133 slot = r10_bio->read_slot;
1134
1135 read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1136 bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
1137 max_sectors);
1138
1139 r10_bio->devs[slot].bio = read_bio;
1140 r10_bio->devs[slot].rdev = rdev;
1141
1142 read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
1143 choose_data_offset(r10_bio, rdev);
1144 read_bio->bi_bdev = rdev->bdev;
1145 read_bio->bi_end_io = raid10_end_read_request;
1146 bio_set_op_attrs(read_bio, op, do_sync);
1147 if (test_bit(FailFast, &rdev->flags) &&
1148 test_bit(R10BIO_FailFast, &r10_bio->state))
1149 read_bio->bi_opf |= MD_FAILFAST;
1150 read_bio->bi_private = r10_bio;
1151
1152 if (mddev->gendisk)
1153 trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
1154 read_bio, disk_devt(mddev->gendisk),
1155 r10_bio->sector);
1156 if (max_sectors < r10_bio->sectors) {
1157 /*
1158 * Could not read all from this device, so we will need another
1159 * r10_bio.
1160 */
1161 sectors_handled = (r10_bio->sector + max_sectors
1162 - bio->bi_iter.bi_sector);
1163 r10_bio->sectors = max_sectors;
1164 spin_lock_irq(&conf->device_lock);
1165 if (bio->bi_phys_segments == 0)
1166 bio->bi_phys_segments = 2;
1167 else
1168 bio->bi_phys_segments++;
1169 spin_unlock_irq(&conf->device_lock);
1170 /*
1171 * Cannot call generic_make_request directly as that will be
1172 * queued in __generic_make_request and subsequent
1173 * mempool_alloc might block waiting for it. so hand bio over
1174 * to raid10d.
1175 */
1176 reschedule_retry(r10_bio);
1177
1178 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
1179
1180 r10_bio->master_bio = bio;
1181 r10_bio->sectors = bio_sectors(bio) - sectors_handled;
1182 r10_bio->state = 0;
1183 r10_bio->mddev = mddev;
1184 r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
1185 goto read_again;
1186 } else
1187 generic_make_request(read_bio);
1188 return;
1189}
1190
1191static void raid10_write_request(struct mddev *mddev, struct bio *bio,
1192 struct r10bio *r10_bio)
1193{
1194 struct r10conf *conf = mddev->private;
1095 int i; 1195 int i;
1096 const int op = bio_op(bio); 1196 const int op = bio_op(bio);
1097 const int rw = bio_data_dir(bio);
1098 const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); 1197 const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
1099 const unsigned long do_fua = (bio->bi_opf & REQ_FUA); 1198 const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
1100 unsigned long flags; 1199 unsigned long flags;
1101 struct md_rdev *blocked_rdev; 1200 struct md_rdev *blocked_rdev;
1102 struct blk_plug_cb *cb; 1201 struct blk_plug_cb *cb;
1103 struct raid10_plug_cb *plug = NULL; 1202 struct raid10_plug_cb *plug = NULL;
1203 sector_t sectors;
1104 int sectors_handled; 1204 int sectors_handled;
1105 int max_sectors; 1205 int max_sectors;
1106 int sectors;
1107 1206
1108 md_write_start(mddev, bio); 1207 md_write_start(mddev, bio);
1109 1208
@@ -1118,8 +1217,9 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
1118 while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && 1217 while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
1119 bio->bi_iter.bi_sector < conf->reshape_progress && 1218 bio->bi_iter.bi_sector < conf->reshape_progress &&
1120 bio->bi_iter.bi_sector + sectors > conf->reshape_progress) { 1219 bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
1121 /* IO spans the reshape position. Need to wait for 1220 /*
1122 * reshape to pass 1221 * IO spans the reshape position. Need to wait for reshape to
1222 * pass
1123 */ 1223 */
1124 raid10_log(conf->mddev, "wait reshape"); 1224 raid10_log(conf->mddev, "wait reshape");
1125 allow_barrier(conf); 1225 allow_barrier(conf);
@@ -1129,8 +1229,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
1129 sectors); 1229 sectors);
1130 wait_barrier(conf); 1230 wait_barrier(conf);
1131 } 1231 }
1232
1132 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && 1233 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
1133 bio_data_dir(bio) == WRITE &&
1134 (mddev->reshape_backwards 1234 (mddev->reshape_backwards
1135 ? (bio->bi_iter.bi_sector < conf->reshape_safe && 1235 ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
1136 bio->bi_iter.bi_sector + sectors > conf->reshape_progress) 1236 bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
@@ -1148,98 +1248,6 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
1148 conf->reshape_safe = mddev->reshape_position; 1248 conf->reshape_safe = mddev->reshape_position;
1149 } 1249 }
1150 1250
1151 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
1152
1153 r10_bio->master_bio = bio;
1154 r10_bio->sectors = sectors;
1155
1156 r10_bio->mddev = mddev;
1157 r10_bio->sector = bio->bi_iter.bi_sector;
1158 r10_bio->state = 0;
1159
1160 /* We might need to issue multiple reads to different
1161 * devices if there are bad blocks around, so we keep
1162 * track of the number of reads in bio->bi_phys_segments.
1163 * If this is 0, there is only one r10_bio and no locking
1164 * will be needed when the request completes. If it is
1165 * non-zero, then it is the number of not-completed requests.
1166 */
1167 bio->bi_phys_segments = 0;
1168 bio_clear_flag(bio, BIO_SEG_VALID);
1169
1170 if (rw == READ) {
1171 /*
1172 * read balancing logic:
1173 */
1174 struct md_rdev *rdev;
1175 int slot;
1176
1177read_again:
1178 rdev = read_balance(conf, r10_bio, &max_sectors);
1179 if (!rdev) {
1180 raid_end_bio_io(r10_bio);
1181 return;
1182 }
1183 slot = r10_bio->read_slot;
1184
1185 read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1186 bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
1187 max_sectors);
1188
1189 r10_bio->devs[slot].bio = read_bio;
1190 r10_bio->devs[slot].rdev = rdev;
1191
1192 read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
1193 choose_data_offset(r10_bio, rdev);
1194 read_bio->bi_bdev = rdev->bdev;
1195 read_bio->bi_end_io = raid10_end_read_request;
1196 bio_set_op_attrs(read_bio, op, do_sync);
1197 if (test_bit(FailFast, &rdev->flags) &&
1198 test_bit(R10BIO_FailFast, &r10_bio->state))
1199 read_bio->bi_opf |= MD_FAILFAST;
1200 read_bio->bi_private = r10_bio;
1201
1202 if (mddev->gendisk)
1203 trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
1204 read_bio, disk_devt(mddev->gendisk),
1205 r10_bio->sector);
1206 if (max_sectors < r10_bio->sectors) {
1207 /* Could not read all from this device, so we will
1208 * need another r10_bio.
1209 */
1210 sectors_handled = (r10_bio->sector + max_sectors
1211 - bio->bi_iter.bi_sector);
1212 r10_bio->sectors = max_sectors;
1213 spin_lock_irq(&conf->device_lock);
1214 if (bio->bi_phys_segments == 0)
1215 bio->bi_phys_segments = 2;
1216 else
1217 bio->bi_phys_segments++;
1218 spin_unlock_irq(&conf->device_lock);
1219 /* Cannot call generic_make_request directly
1220 * as that will be queued in __generic_make_request
1221 * and subsequent mempool_alloc might block
1222 * waiting for it. so hand bio over to raid10d.
1223 */
1224 reschedule_retry(r10_bio);
1225
1226 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
1227
1228 r10_bio->master_bio = bio;
1229 r10_bio->sectors = bio_sectors(bio) - sectors_handled;
1230 r10_bio->state = 0;
1231 r10_bio->mddev = mddev;
1232 r10_bio->sector = bio->bi_iter.bi_sector +
1233 sectors_handled;
1234 goto read_again;
1235 } else
1236 generic_make_request(read_bio);
1237 return;
1238 }
1239
1240 /*
1241 * WRITE:
1242 */
1243 if (conf->pending_count >= max_queued_requests) { 1251 if (conf->pending_count >= max_queued_requests) {
1244 md_wakeup_thread(mddev->thread); 1252 md_wakeup_thread(mddev->thread);
1245 raid10_log(mddev, "wait queued"); 1253 raid10_log(mddev, "wait queued");
@@ -1300,8 +1308,7 @@ retry_write:
1300 int bad_sectors; 1308 int bad_sectors;
1301 int is_bad; 1309 int is_bad;
1302 1310
1303 is_bad = is_badblock(rdev, dev_sector, 1311 is_bad = is_badblock(rdev, dev_sector, max_sectors,
1304 max_sectors,
1305 &first_bad, &bad_sectors); 1312 &first_bad, &bad_sectors);
1306 if (is_bad < 0) { 1313 if (is_bad < 0) {
1307 /* Mustn't write here until the bad block 1314 /* Mustn't write here until the bad block
@@ -1405,8 +1412,7 @@ retry_write:
1405 r10_bio->devs[i].bio = mbio; 1412 r10_bio->devs[i].bio = mbio;
1406 1413
1407 mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+ 1414 mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
1408 choose_data_offset(r10_bio, 1415 choose_data_offset(r10_bio, rdev));
1409 rdev));
1410 mbio->bi_bdev = rdev->bdev; 1416 mbio->bi_bdev = rdev->bdev;
1411 mbio->bi_end_io = raid10_end_write_request; 1417 mbio->bi_end_io = raid10_end_write_request;
1412 bio_set_op_attrs(mbio, op, do_sync | do_fua); 1418 bio_set_op_attrs(mbio, op, do_sync | do_fua);
@@ -1457,8 +1463,7 @@ retry_write:
1457 r10_bio->devs[i].repl_bio = mbio; 1463 r10_bio->devs[i].repl_bio = mbio;
1458 1464
1459 mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr + 1465 mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr +
1460 choose_data_offset( 1466 choose_data_offset(r10_bio, rdev));
1461 r10_bio, rdev));
1462 mbio->bi_bdev = rdev->bdev; 1467 mbio->bi_bdev = rdev->bdev;
1463 mbio->bi_end_io = raid10_end_write_request; 1468 mbio->bi_end_io = raid10_end_write_request;
1464 bio_set_op_attrs(mbio, op, do_sync | do_fua); 1469 bio_set_op_attrs(mbio, op, do_sync | do_fua);
@@ -1503,6 +1508,36 @@ retry_write:
1503 one_write_done(r10_bio); 1508 one_write_done(r10_bio);
1504} 1509}
1505 1510
1511static void __make_request(struct mddev *mddev, struct bio *bio)
1512{
1513 struct r10conf *conf = mddev->private;
1514 struct r10bio *r10_bio;
1515
1516 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
1517
1518 r10_bio->master_bio = bio;
1519 r10_bio->sectors = bio_sectors(bio);
1520
1521 r10_bio->mddev = mddev;
1522 r10_bio->sector = bio->bi_iter.bi_sector;
1523 r10_bio->state = 0;
1524
1525 /*
1526 * We might need to issue multiple reads to different devices if there
1527 * are bad blocks around, so we keep track of the number of reads in
1528 * bio->bi_phys_segments. If this is 0, there is only one r10_bio and
1529 * no locking will be needed when the request completes. If it is
1530 * non-zero, then it is the number of not-completed requests.
1531 */
1532 bio->bi_phys_segments = 0;
1533 bio_clear_flag(bio, BIO_SEG_VALID);
1534
1535 if (bio_data_dir(bio) == READ)
1536 raid10_read_request(mddev, bio, r10_bio);
1537 else
1538 raid10_write_request(mddev, bio, r10_bio);
1539}
1540
1506static void raid10_make_request(struct mddev *mddev, struct bio *bio) 1541static void raid10_make_request(struct mddev *mddev, struct bio *bio)
1507{ 1542{
1508 struct r10conf *conf = mddev->private; 1543 struct r10conf *conf = mddev->private;