diff options
Diffstat (limited to 'drivers/md/raid10.c')
| -rw-r--r-- | drivers/md/raid10.c | 245 |
1 files changed, 140 insertions, 105 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ab5e86209322..1920756828df 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
| @@ -1087,23 +1087,122 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) | |||
| 1087 | kfree(plug); | 1087 | kfree(plug); |
| 1088 | } | 1088 | } |
| 1089 | 1089 | ||
| 1090 | static void __make_request(struct mddev *mddev, struct bio *bio) | 1090 | static void raid10_read_request(struct mddev *mddev, struct bio *bio, |
| 1091 | struct r10bio *r10_bio) | ||
| 1091 | { | 1092 | { |
| 1092 | struct r10conf *conf = mddev->private; | 1093 | struct r10conf *conf = mddev->private; |
| 1093 | struct r10bio *r10_bio; | ||
| 1094 | struct bio *read_bio; | 1094 | struct bio *read_bio; |
| 1095 | const int op = bio_op(bio); | ||
| 1096 | const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); | ||
| 1097 | int sectors_handled; | ||
| 1098 | int max_sectors; | ||
| 1099 | sector_t sectors; | ||
| 1100 | struct md_rdev *rdev; | ||
| 1101 | int slot; | ||
| 1102 | |||
| 1103 | /* | ||
| 1104 | * Register the new request and wait if the reconstruction | ||
| 1105 | * thread has put up a bar for new requests. | ||
| 1106 | * Continue immediately if no resync is active currently. | ||
| 1107 | */ | ||
| 1108 | wait_barrier(conf); | ||
| 1109 | |||
| 1110 | sectors = bio_sectors(bio); | ||
| 1111 | while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && | ||
| 1112 | bio->bi_iter.bi_sector < conf->reshape_progress && | ||
| 1113 | bio->bi_iter.bi_sector + sectors > conf->reshape_progress) { | ||
| 1114 | /* | ||
| 1115 | * IO spans the reshape position. Need to wait for reshape to | ||
| 1116 | * pass | ||
| 1117 | */ | ||
| 1118 | raid10_log(conf->mddev, "wait reshape"); | ||
| 1119 | allow_barrier(conf); | ||
| 1120 | wait_event(conf->wait_barrier, | ||
| 1121 | conf->reshape_progress <= bio->bi_iter.bi_sector || | ||
| 1122 | conf->reshape_progress >= bio->bi_iter.bi_sector + | ||
| 1123 | sectors); | ||
| 1124 | wait_barrier(conf); | ||
| 1125 | } | ||
| 1126 | |||
| 1127 | read_again: | ||
| 1128 | rdev = read_balance(conf, r10_bio, &max_sectors); | ||
| 1129 | if (!rdev) { | ||
| 1130 | raid_end_bio_io(r10_bio); | ||
| 1131 | return; | ||
| 1132 | } | ||
| 1133 | slot = r10_bio->read_slot; | ||
| 1134 | |||
| 1135 | read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); | ||
| 1136 | bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector, | ||
| 1137 | max_sectors); | ||
| 1138 | |||
| 1139 | r10_bio->devs[slot].bio = read_bio; | ||
| 1140 | r10_bio->devs[slot].rdev = rdev; | ||
| 1141 | |||
| 1142 | read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr + | ||
| 1143 | choose_data_offset(r10_bio, rdev); | ||
| 1144 | read_bio->bi_bdev = rdev->bdev; | ||
| 1145 | read_bio->bi_end_io = raid10_end_read_request; | ||
| 1146 | bio_set_op_attrs(read_bio, op, do_sync); | ||
| 1147 | if (test_bit(FailFast, &rdev->flags) && | ||
| 1148 | test_bit(R10BIO_FailFast, &r10_bio->state)) | ||
| 1149 | read_bio->bi_opf |= MD_FAILFAST; | ||
| 1150 | read_bio->bi_private = r10_bio; | ||
| 1151 | |||
| 1152 | if (mddev->gendisk) | ||
| 1153 | trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev), | ||
| 1154 | read_bio, disk_devt(mddev->gendisk), | ||
| 1155 | r10_bio->sector); | ||
| 1156 | if (max_sectors < r10_bio->sectors) { | ||
| 1157 | /* | ||
| 1158 | * Could not read all from this device, so we will need another | ||
| 1159 | * r10_bio. | ||
| 1160 | */ | ||
| 1161 | sectors_handled = (r10_bio->sector + max_sectors | ||
| 1162 | - bio->bi_iter.bi_sector); | ||
| 1163 | r10_bio->sectors = max_sectors; | ||
| 1164 | spin_lock_irq(&conf->device_lock); | ||
| 1165 | if (bio->bi_phys_segments == 0) | ||
| 1166 | bio->bi_phys_segments = 2; | ||
| 1167 | else | ||
| 1168 | bio->bi_phys_segments++; | ||
| 1169 | spin_unlock_irq(&conf->device_lock); | ||
| 1170 | /* | ||
| 1171 | * Cannot call generic_make_request directly as that will be | ||
| 1172 | * queued in __generic_make_request and subsequent | ||
| 1173 | * mempool_alloc might block waiting for it. so hand bio over | ||
| 1174 | * to raid10d. | ||
| 1175 | */ | ||
| 1176 | reschedule_retry(r10_bio); | ||
| 1177 | |||
| 1178 | r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); | ||
| 1179 | |||
| 1180 | r10_bio->master_bio = bio; | ||
| 1181 | r10_bio->sectors = bio_sectors(bio) - sectors_handled; | ||
| 1182 | r10_bio->state = 0; | ||
| 1183 | r10_bio->mddev = mddev; | ||
| 1184 | r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled; | ||
| 1185 | goto read_again; | ||
| 1186 | } else | ||
| 1187 | generic_make_request(read_bio); | ||
| 1188 | return; | ||
| 1189 | } | ||
| 1190 | |||
| 1191 | static void raid10_write_request(struct mddev *mddev, struct bio *bio, | ||
| 1192 | struct r10bio *r10_bio) | ||
| 1193 | { | ||
| 1194 | struct r10conf *conf = mddev->private; | ||
| 1095 | int i; | 1195 | int i; |
| 1096 | const int op = bio_op(bio); | 1196 | const int op = bio_op(bio); |
| 1097 | const int rw = bio_data_dir(bio); | ||
| 1098 | const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); | 1197 | const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); |
| 1099 | const unsigned long do_fua = (bio->bi_opf & REQ_FUA); | 1198 | const unsigned long do_fua = (bio->bi_opf & REQ_FUA); |
| 1100 | unsigned long flags; | 1199 | unsigned long flags; |
| 1101 | struct md_rdev *blocked_rdev; | 1200 | struct md_rdev *blocked_rdev; |
| 1102 | struct blk_plug_cb *cb; | 1201 | struct blk_plug_cb *cb; |
| 1103 | struct raid10_plug_cb *plug = NULL; | 1202 | struct raid10_plug_cb *plug = NULL; |
| 1203 | sector_t sectors; | ||
| 1104 | int sectors_handled; | 1204 | int sectors_handled; |
| 1105 | int max_sectors; | 1205 | int max_sectors; |
| 1106 | int sectors; | ||
| 1107 | 1206 | ||
| 1108 | md_write_start(mddev, bio); | 1207 | md_write_start(mddev, bio); |
| 1109 | 1208 | ||
| @@ -1118,8 +1217,9 @@ static void __make_request(struct mddev *mddev, struct bio *bio) | |||
| 1118 | while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && | 1217 | while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && |
| 1119 | bio->bi_iter.bi_sector < conf->reshape_progress && | 1218 | bio->bi_iter.bi_sector < conf->reshape_progress && |
| 1120 | bio->bi_iter.bi_sector + sectors > conf->reshape_progress) { | 1219 | bio->bi_iter.bi_sector + sectors > conf->reshape_progress) { |
| 1121 | /* IO spans the reshape position. Need to wait for | 1220 | /* |
| 1122 | * reshape to pass | 1221 | * IO spans the reshape position. Need to wait for reshape to |
| 1222 | * pass | ||
| 1123 | */ | 1223 | */ |
| 1124 | raid10_log(conf->mddev, "wait reshape"); | 1224 | raid10_log(conf->mddev, "wait reshape"); |
| 1125 | allow_barrier(conf); | 1225 | allow_barrier(conf); |
| @@ -1129,8 +1229,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio) | |||
| 1129 | sectors); | 1229 | sectors); |
| 1130 | wait_barrier(conf); | 1230 | wait_barrier(conf); |
| 1131 | } | 1231 | } |
| 1232 | |||
| 1132 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && | 1233 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && |
| 1133 | bio_data_dir(bio) == WRITE && | ||
| 1134 | (mddev->reshape_backwards | 1234 | (mddev->reshape_backwards |
| 1135 | ? (bio->bi_iter.bi_sector < conf->reshape_safe && | 1235 | ? (bio->bi_iter.bi_sector < conf->reshape_safe && |
| 1136 | bio->bi_iter.bi_sector + sectors > conf->reshape_progress) | 1236 | bio->bi_iter.bi_sector + sectors > conf->reshape_progress) |
| @@ -1148,98 +1248,6 @@ static void __make_request(struct mddev *mddev, struct bio *bio) | |||
| 1148 | conf->reshape_safe = mddev->reshape_position; | 1248 | conf->reshape_safe = mddev->reshape_position; |
| 1149 | } | 1249 | } |
| 1150 | 1250 | ||
| 1151 | r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); | ||
| 1152 | |||
| 1153 | r10_bio->master_bio = bio; | ||
| 1154 | r10_bio->sectors = sectors; | ||
| 1155 | |||
| 1156 | r10_bio->mddev = mddev; | ||
| 1157 | r10_bio->sector = bio->bi_iter.bi_sector; | ||
| 1158 | r10_bio->state = 0; | ||
| 1159 | |||
| 1160 | /* We might need to issue multiple reads to different | ||
| 1161 | * devices if there are bad blocks around, so we keep | ||
| 1162 | * track of the number of reads in bio->bi_phys_segments. | ||
| 1163 | * If this is 0, there is only one r10_bio and no locking | ||
| 1164 | * will be needed when the request completes. If it is | ||
| 1165 | * non-zero, then it is the number of not-completed requests. | ||
| 1166 | */ | ||
| 1167 | bio->bi_phys_segments = 0; | ||
| 1168 | bio_clear_flag(bio, BIO_SEG_VALID); | ||
| 1169 | |||
| 1170 | if (rw == READ) { | ||
| 1171 | /* | ||
| 1172 | * read balancing logic: | ||
| 1173 | */ | ||
| 1174 | struct md_rdev *rdev; | ||
| 1175 | int slot; | ||
| 1176 | |||
| 1177 | read_again: | ||
| 1178 | rdev = read_balance(conf, r10_bio, &max_sectors); | ||
| 1179 | if (!rdev) { | ||
| 1180 | raid_end_bio_io(r10_bio); | ||
| 1181 | return; | ||
| 1182 | } | ||
| 1183 | slot = r10_bio->read_slot; | ||
| 1184 | |||
| 1185 | read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); | ||
| 1186 | bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector, | ||
| 1187 | max_sectors); | ||
| 1188 | |||
| 1189 | r10_bio->devs[slot].bio = read_bio; | ||
| 1190 | r10_bio->devs[slot].rdev = rdev; | ||
| 1191 | |||
| 1192 | read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr + | ||
| 1193 | choose_data_offset(r10_bio, rdev); | ||
| 1194 | read_bio->bi_bdev = rdev->bdev; | ||
| 1195 | read_bio->bi_end_io = raid10_end_read_request; | ||
| 1196 | bio_set_op_attrs(read_bio, op, do_sync); | ||
| 1197 | if (test_bit(FailFast, &rdev->flags) && | ||
| 1198 | test_bit(R10BIO_FailFast, &r10_bio->state)) | ||
| 1199 | read_bio->bi_opf |= MD_FAILFAST; | ||
| 1200 | read_bio->bi_private = r10_bio; | ||
| 1201 | |||
| 1202 | if (mddev->gendisk) | ||
| 1203 | trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev), | ||
| 1204 | read_bio, disk_devt(mddev->gendisk), | ||
| 1205 | r10_bio->sector); | ||
| 1206 | if (max_sectors < r10_bio->sectors) { | ||
| 1207 | /* Could not read all from this device, so we will | ||
| 1208 | * need another r10_bio. | ||
| 1209 | */ | ||
| 1210 | sectors_handled = (r10_bio->sector + max_sectors | ||
| 1211 | - bio->bi_iter.bi_sector); | ||
| 1212 | r10_bio->sectors = max_sectors; | ||
| 1213 | spin_lock_irq(&conf->device_lock); | ||
| 1214 | if (bio->bi_phys_segments == 0) | ||
| 1215 | bio->bi_phys_segments = 2; | ||
| 1216 | else | ||
| 1217 | bio->bi_phys_segments++; | ||
| 1218 | spin_unlock_irq(&conf->device_lock); | ||
| 1219 | /* Cannot call generic_make_request directly | ||
| 1220 | * as that will be queued in __generic_make_request | ||
| 1221 | * and subsequent mempool_alloc might block | ||
| 1222 | * waiting for it. so hand bio over to raid10d. | ||
| 1223 | */ | ||
| 1224 | reschedule_retry(r10_bio); | ||
| 1225 | |||
| 1226 | r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); | ||
| 1227 | |||
| 1228 | r10_bio->master_bio = bio; | ||
| 1229 | r10_bio->sectors = bio_sectors(bio) - sectors_handled; | ||
| 1230 | r10_bio->state = 0; | ||
| 1231 | r10_bio->mddev = mddev; | ||
| 1232 | r10_bio->sector = bio->bi_iter.bi_sector + | ||
| 1233 | sectors_handled; | ||
| 1234 | goto read_again; | ||
| 1235 | } else | ||
| 1236 | generic_make_request(read_bio); | ||
| 1237 | return; | ||
| 1238 | } | ||
| 1239 | |||
| 1240 | /* | ||
| 1241 | * WRITE: | ||
| 1242 | */ | ||
| 1243 | if (conf->pending_count >= max_queued_requests) { | 1251 | if (conf->pending_count >= max_queued_requests) { |
| 1244 | md_wakeup_thread(mddev->thread); | 1252 | md_wakeup_thread(mddev->thread); |
| 1245 | raid10_log(mddev, "wait queued"); | 1253 | raid10_log(mddev, "wait queued"); |
| @@ -1300,8 +1308,7 @@ retry_write: | |||
| 1300 | int bad_sectors; | 1308 | int bad_sectors; |
| 1301 | int is_bad; | 1309 | int is_bad; |
| 1302 | 1310 | ||
| 1303 | is_bad = is_badblock(rdev, dev_sector, | 1311 | is_bad = is_badblock(rdev, dev_sector, max_sectors, |
| 1304 | max_sectors, | ||
| 1305 | &first_bad, &bad_sectors); | 1312 | &first_bad, &bad_sectors); |
| 1306 | if (is_bad < 0) { | 1313 | if (is_bad < 0) { |
| 1307 | /* Mustn't write here until the bad block | 1314 | /* Mustn't write here until the bad block |
| @@ -1405,8 +1412,7 @@ retry_write: | |||
| 1405 | r10_bio->devs[i].bio = mbio; | 1412 | r10_bio->devs[i].bio = mbio; |
| 1406 | 1413 | ||
| 1407 | mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+ | 1414 | mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+ |
| 1408 | choose_data_offset(r10_bio, | 1415 | choose_data_offset(r10_bio, rdev)); |
| 1409 | rdev)); | ||
| 1410 | mbio->bi_bdev = rdev->bdev; | 1416 | mbio->bi_bdev = rdev->bdev; |
| 1411 | mbio->bi_end_io = raid10_end_write_request; | 1417 | mbio->bi_end_io = raid10_end_write_request; |
| 1412 | bio_set_op_attrs(mbio, op, do_sync | do_fua); | 1418 | bio_set_op_attrs(mbio, op, do_sync | do_fua); |
| @@ -1457,8 +1463,7 @@ retry_write: | |||
| 1457 | r10_bio->devs[i].repl_bio = mbio; | 1463 | r10_bio->devs[i].repl_bio = mbio; |
| 1458 | 1464 | ||
| 1459 | mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr + | 1465 | mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr + |
| 1460 | choose_data_offset( | 1466 | choose_data_offset(r10_bio, rdev)); |
| 1461 | r10_bio, rdev)); | ||
| 1462 | mbio->bi_bdev = rdev->bdev; | 1467 | mbio->bi_bdev = rdev->bdev; |
| 1463 | mbio->bi_end_io = raid10_end_write_request; | 1468 | mbio->bi_end_io = raid10_end_write_request; |
| 1464 | bio_set_op_attrs(mbio, op, do_sync | do_fua); | 1469 | bio_set_op_attrs(mbio, op, do_sync | do_fua); |
| @@ -1503,6 +1508,36 @@ retry_write: | |||
| 1503 | one_write_done(r10_bio); | 1508 | one_write_done(r10_bio); |
| 1504 | } | 1509 | } |
| 1505 | 1510 | ||
| 1511 | static void __make_request(struct mddev *mddev, struct bio *bio) | ||
| 1512 | { | ||
| 1513 | struct r10conf *conf = mddev->private; | ||
| 1514 | struct r10bio *r10_bio; | ||
| 1515 | |||
| 1516 | r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); | ||
| 1517 | |||
| 1518 | r10_bio->master_bio = bio; | ||
| 1519 | r10_bio->sectors = bio_sectors(bio); | ||
| 1520 | |||
| 1521 | r10_bio->mddev = mddev; | ||
| 1522 | r10_bio->sector = bio->bi_iter.bi_sector; | ||
| 1523 | r10_bio->state = 0; | ||
| 1524 | |||
| 1525 | /* | ||
| 1526 | * We might need to issue multiple reads to different devices if there | ||
| 1527 | * are bad blocks around, so we keep track of the number of reads in | ||
| 1528 | * bio->bi_phys_segments. If this is 0, there is only one r10_bio and | ||
| 1529 | * no locking will be needed when the request completes. If it is | ||
| 1530 | * non-zero, then it is the number of not-completed requests. | ||
| 1531 | */ | ||
| 1532 | bio->bi_phys_segments = 0; | ||
| 1533 | bio_clear_flag(bio, BIO_SEG_VALID); | ||
| 1534 | |||
| 1535 | if (bio_data_dir(bio) == READ) | ||
| 1536 | raid10_read_request(mddev, bio, r10_bio); | ||
| 1537 | else | ||
| 1538 | raid10_write_request(mddev, bio, r10_bio); | ||
| 1539 | } | ||
| 1540 | |||
| 1506 | static void raid10_make_request(struct mddev *mddev, struct bio *bio) | 1541 | static void raid10_make_request(struct mddev *mddev, struct bio *bio) |
| 1507 | { | 1542 | { |
| 1508 | struct r10conf *conf = mddev->private; | 1543 | struct r10conf *conf = mddev->private; |
