diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-01-12 13:17:59 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-01-12 13:17:59 -0500 |
commit | 607ae5f26920b8dfedbbf882c0f9edd3b9aa6cf7 (patch) | |
tree | 2bd4990aaef9b2aeabae1c24d6ff388c5238142f | |
parent | ba836a6f5ab1243ff5e08a941a2d1de8b31244e1 (diff) | |
parent | 32cd7cbbacf700885a2316275f188f2d5739b5f4 (diff) |
Merge tag 'md/4.10-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull md fixes from Shaohua Li:
"Basically one fix for raid5 cache which is merged in this cycle,
others are trival fixes"
* tag 'md/4.10-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
md/raid5: Use correct IS_ERR() variation on pointer check
md: cleanup mddev flag clear for takeover
md/r5cache: fix spelling mistake on "recoverying"
md/r5cache: assign conf->log before r5l_load_log()
md/r5cache: simplify handling of sh->log_start in recovery
md/raid5-cache: removes unnecessary write-through mode judgments
md/raid10: Refactor raid10_make_request
md/raid1: Refactor raid1_make_request
-rw-r--r-- | drivers/md/md.h | 8 | ||||
-rw-r--r-- | drivers/md/raid0.c | 12 | ||||
-rw-r--r-- | drivers/md/raid1.c | 275 | ||||
-rw-r--r-- | drivers/md/raid10.c | 245 | ||||
-rw-r--r-- | drivers/md/raid5-cache.c | 36 | ||||
-rw-r--r-- | drivers/md/raid5.c | 7 |
6 files changed, 322 insertions, 261 deletions
diff --git a/drivers/md/md.h b/drivers/md/md.h index e38936d05df1..2a514036a83d 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -212,6 +212,7 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors, | |||
212 | int is_new); | 212 | int is_new); |
213 | struct md_cluster_info; | 213 | struct md_cluster_info; |
214 | 214 | ||
215 | /* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added */ | ||
215 | enum mddev_flags { | 216 | enum mddev_flags { |
216 | MD_ARRAY_FIRST_USE, /* First use of array, needs initialization */ | 217 | MD_ARRAY_FIRST_USE, /* First use of array, needs initialization */ |
217 | MD_CLOSING, /* If set, we are closing the array, do not open | 218 | MD_CLOSING, /* If set, we are closing the array, do not open |
@@ -702,4 +703,11 @@ static inline int mddev_is_clustered(struct mddev *mddev) | |||
702 | { | 703 | { |
703 | return mddev->cluster_info && mddev->bitmap_info.nodes > 1; | 704 | return mddev->cluster_info && mddev->bitmap_info.nodes > 1; |
704 | } | 705 | } |
706 | |||
707 | /* clear unsupported mddev_flags */ | ||
708 | static inline void mddev_clear_unsupported_flags(struct mddev *mddev, | ||
709 | unsigned long unsupported_flags) | ||
710 | { | ||
711 | mddev->flags &= ~unsupported_flags; | ||
712 | } | ||
705 | #endif /* _MD_MD_H */ | 713 | #endif /* _MD_MD_H */ |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index a162fedeb51a..848365d474f3 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -26,6 +26,11 @@ | |||
26 | #include "raid0.h" | 26 | #include "raid0.h" |
27 | #include "raid5.h" | 27 | #include "raid5.h" |
28 | 28 | ||
29 | #define UNSUPPORTED_MDDEV_FLAGS \ | ||
30 | ((1L << MD_HAS_JOURNAL) | \ | ||
31 | (1L << MD_JOURNAL_CLEAN) | \ | ||
32 | (1L << MD_FAILFAST_SUPPORTED)) | ||
33 | |||
29 | static int raid0_congested(struct mddev *mddev, int bits) | 34 | static int raid0_congested(struct mddev *mddev, int bits) |
30 | { | 35 | { |
31 | struct r0conf *conf = mddev->private; | 36 | struct r0conf *conf = mddev->private; |
@@ -539,8 +544,7 @@ static void *raid0_takeover_raid45(struct mddev *mddev) | |||
539 | mddev->delta_disks = -1; | 544 | mddev->delta_disks = -1; |
540 | /* make sure it will be not marked as dirty */ | 545 | /* make sure it will be not marked as dirty */ |
541 | mddev->recovery_cp = MaxSector; | 546 | mddev->recovery_cp = MaxSector; |
542 | clear_bit(MD_HAS_JOURNAL, &mddev->flags); | 547 | mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS); |
543 | clear_bit(MD_JOURNAL_CLEAN, &mddev->flags); | ||
544 | 548 | ||
545 | create_strip_zones(mddev, &priv_conf); | 549 | create_strip_zones(mddev, &priv_conf); |
546 | 550 | ||
@@ -583,7 +587,7 @@ static void *raid0_takeover_raid10(struct mddev *mddev) | |||
583 | mddev->degraded = 0; | 587 | mddev->degraded = 0; |
584 | /* make sure it will be not marked as dirty */ | 588 | /* make sure it will be not marked as dirty */ |
585 | mddev->recovery_cp = MaxSector; | 589 | mddev->recovery_cp = MaxSector; |
586 | clear_bit(MD_FAILFAST_SUPPORTED, &mddev->flags); | 590 | mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS); |
587 | 591 | ||
588 | create_strip_zones(mddev, &priv_conf); | 592 | create_strip_zones(mddev, &priv_conf); |
589 | return priv_conf; | 593 | return priv_conf; |
@@ -626,7 +630,7 @@ static void *raid0_takeover_raid1(struct mddev *mddev) | |||
626 | mddev->raid_disks = 1; | 630 | mddev->raid_disks = 1; |
627 | /* make sure it will be not marked as dirty */ | 631 | /* make sure it will be not marked as dirty */ |
628 | mddev->recovery_cp = MaxSector; | 632 | mddev->recovery_cp = MaxSector; |
629 | clear_bit(MD_FAILFAST_SUPPORTED, &mddev->flags); | 633 | mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS); |
630 | 634 | ||
631 | create_strip_zones(mddev, &priv_conf); | 635 | create_strip_zones(mddev, &priv_conf); |
632 | return priv_conf; | 636 | return priv_conf; |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a1f3fbed9100..7b0f647bcccb 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -42,6 +42,10 @@ | |||
42 | #include "raid1.h" | 42 | #include "raid1.h" |
43 | #include "bitmap.h" | 43 | #include "bitmap.h" |
44 | 44 | ||
45 | #define UNSUPPORTED_MDDEV_FLAGS \ | ||
46 | ((1L << MD_HAS_JOURNAL) | \ | ||
47 | (1L << MD_JOURNAL_CLEAN)) | ||
48 | |||
45 | /* | 49 | /* |
46 | * Number of guaranteed r1bios in case of extreme VM load: | 50 | * Number of guaranteed r1bios in case of extreme VM load: |
47 | */ | 51 | */ |
@@ -1066,17 +1070,107 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) | |||
1066 | kfree(plug); | 1070 | kfree(plug); |
1067 | } | 1071 | } |
1068 | 1072 | ||
1069 | static void raid1_make_request(struct mddev *mddev, struct bio * bio) | 1073 | static void raid1_read_request(struct mddev *mddev, struct bio *bio, |
1074 | struct r1bio *r1_bio) | ||
1070 | { | 1075 | { |
1071 | struct r1conf *conf = mddev->private; | 1076 | struct r1conf *conf = mddev->private; |
1072 | struct raid1_info *mirror; | 1077 | struct raid1_info *mirror; |
1073 | struct r1bio *r1_bio; | ||
1074 | struct bio *read_bio; | 1078 | struct bio *read_bio; |
1079 | struct bitmap *bitmap = mddev->bitmap; | ||
1080 | const int op = bio_op(bio); | ||
1081 | const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); | ||
1082 | int sectors_handled; | ||
1083 | int max_sectors; | ||
1084 | int rdisk; | ||
1085 | |||
1086 | wait_barrier(conf, bio); | ||
1087 | |||
1088 | read_again: | ||
1089 | rdisk = read_balance(conf, r1_bio, &max_sectors); | ||
1090 | |||
1091 | if (rdisk < 0) { | ||
1092 | /* couldn't find anywhere to read from */ | ||
1093 | raid_end_bio_io(r1_bio); | ||
1094 | return; | ||
1095 | } | ||
1096 | mirror = conf->mirrors + rdisk; | ||
1097 | |||
1098 | if (test_bit(WriteMostly, &mirror->rdev->flags) && | ||
1099 | bitmap) { | ||
1100 | /* | ||
1101 | * Reading from a write-mostly device must take care not to | ||
1102 | * over-take any writes that are 'behind' | ||
1103 | */ | ||
1104 | raid1_log(mddev, "wait behind writes"); | ||
1105 | wait_event(bitmap->behind_wait, | ||
1106 | atomic_read(&bitmap->behind_writes) == 0); | ||
1107 | } | ||
1108 | r1_bio->read_disk = rdisk; | ||
1109 | r1_bio->start_next_window = 0; | ||
1110 | |||
1111 | read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); | ||
1112 | bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector, | ||
1113 | max_sectors); | ||
1114 | |||
1115 | r1_bio->bios[rdisk] = read_bio; | ||
1116 | |||
1117 | read_bio->bi_iter.bi_sector = r1_bio->sector + | ||
1118 | mirror->rdev->data_offset; | ||
1119 | read_bio->bi_bdev = mirror->rdev->bdev; | ||
1120 | read_bio->bi_end_io = raid1_end_read_request; | ||
1121 | bio_set_op_attrs(read_bio, op, do_sync); | ||
1122 | if (test_bit(FailFast, &mirror->rdev->flags) && | ||
1123 | test_bit(R1BIO_FailFast, &r1_bio->state)) | ||
1124 | read_bio->bi_opf |= MD_FAILFAST; | ||
1125 | read_bio->bi_private = r1_bio; | ||
1126 | |||
1127 | if (mddev->gendisk) | ||
1128 | trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev), | ||
1129 | read_bio, disk_devt(mddev->gendisk), | ||
1130 | r1_bio->sector); | ||
1131 | |||
1132 | if (max_sectors < r1_bio->sectors) { | ||
1133 | /* | ||
1134 | * could not read all from this device, so we will need another | ||
1135 | * r1_bio. | ||
1136 | */ | ||
1137 | sectors_handled = (r1_bio->sector + max_sectors | ||
1138 | - bio->bi_iter.bi_sector); | ||
1139 | r1_bio->sectors = max_sectors; | ||
1140 | spin_lock_irq(&conf->device_lock); | ||
1141 | if (bio->bi_phys_segments == 0) | ||
1142 | bio->bi_phys_segments = 2; | ||
1143 | else | ||
1144 | bio->bi_phys_segments++; | ||
1145 | spin_unlock_irq(&conf->device_lock); | ||
1146 | |||
1147 | /* | ||
1148 | * Cannot call generic_make_request directly as that will be | ||
1149 | * queued in __make_request and subsequent mempool_alloc might | ||
1150 | * block waiting for it. So hand bio over to raid1d. | ||
1151 | */ | ||
1152 | reschedule_retry(r1_bio); | ||
1153 | |||
1154 | r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); | ||
1155 | |||
1156 | r1_bio->master_bio = bio; | ||
1157 | r1_bio->sectors = bio_sectors(bio) - sectors_handled; | ||
1158 | r1_bio->state = 0; | ||
1159 | r1_bio->mddev = mddev; | ||
1160 | r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled; | ||
1161 | goto read_again; | ||
1162 | } else | ||
1163 | generic_make_request(read_bio); | ||
1164 | } | ||
1165 | |||
1166 | static void raid1_write_request(struct mddev *mddev, struct bio *bio, | ||
1167 | struct r1bio *r1_bio) | ||
1168 | { | ||
1169 | struct r1conf *conf = mddev->private; | ||
1075 | int i, disks; | 1170 | int i, disks; |
1076 | struct bitmap *bitmap; | 1171 | struct bitmap *bitmap = mddev->bitmap; |
1077 | unsigned long flags; | 1172 | unsigned long flags; |
1078 | const int op = bio_op(bio); | 1173 | const int op = bio_op(bio); |
1079 | const int rw = bio_data_dir(bio); | ||
1080 | const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); | 1174 | const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); |
1081 | const unsigned long do_flush_fua = (bio->bi_opf & | 1175 | const unsigned long do_flush_fua = (bio->bi_opf & |
1082 | (REQ_PREFLUSH | REQ_FUA)); | 1176 | (REQ_PREFLUSH | REQ_FUA)); |
@@ -1096,15 +1190,15 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio) | |||
1096 | 1190 | ||
1097 | md_write_start(mddev, bio); /* wait on superblock update early */ | 1191 | md_write_start(mddev, bio); /* wait on superblock update early */ |
1098 | 1192 | ||
1099 | if (bio_data_dir(bio) == WRITE && | 1193 | if ((bio_end_sector(bio) > mddev->suspend_lo && |
1100 | ((bio_end_sector(bio) > mddev->suspend_lo && | ||
1101 | bio->bi_iter.bi_sector < mddev->suspend_hi) || | 1194 | bio->bi_iter.bi_sector < mddev->suspend_hi) || |
1102 | (mddev_is_clustered(mddev) && | 1195 | (mddev_is_clustered(mddev) && |
1103 | md_cluster_ops->area_resyncing(mddev, WRITE, | 1196 | md_cluster_ops->area_resyncing(mddev, WRITE, |
1104 | bio->bi_iter.bi_sector, bio_end_sector(bio))))) { | 1197 | bio->bi_iter.bi_sector, bio_end_sector(bio)))) { |
1105 | /* As the suspend_* range is controlled by | 1198 | |
1106 | * userspace, we want an interruptible | 1199 | /* |
1107 | * wait. | 1200 | * As the suspend_* range is controlled by userspace, we want |
1201 | * an interruptible wait. | ||
1108 | */ | 1202 | */ |
1109 | DEFINE_WAIT(w); | 1203 | DEFINE_WAIT(w); |
1110 | for (;;) { | 1204 | for (;;) { |
@@ -1115,128 +1209,15 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio) | |||
1115 | bio->bi_iter.bi_sector >= mddev->suspend_hi || | 1209 | bio->bi_iter.bi_sector >= mddev->suspend_hi || |
1116 | (mddev_is_clustered(mddev) && | 1210 | (mddev_is_clustered(mddev) && |
1117 | !md_cluster_ops->area_resyncing(mddev, WRITE, | 1211 | !md_cluster_ops->area_resyncing(mddev, WRITE, |
1118 | bio->bi_iter.bi_sector, bio_end_sector(bio)))) | 1212 | bio->bi_iter.bi_sector, |
1213 | bio_end_sector(bio)))) | ||
1119 | break; | 1214 | break; |
1120 | schedule(); | 1215 | schedule(); |
1121 | } | 1216 | } |
1122 | finish_wait(&conf->wait_barrier, &w); | 1217 | finish_wait(&conf->wait_barrier, &w); |
1123 | } | 1218 | } |
1124 | |||
1125 | start_next_window = wait_barrier(conf, bio); | 1219 | start_next_window = wait_barrier(conf, bio); |
1126 | 1220 | ||
1127 | bitmap = mddev->bitmap; | ||
1128 | |||
1129 | /* | ||
1130 | * make_request() can abort the operation when read-ahead is being | ||
1131 | * used and no empty request is available. | ||
1132 | * | ||
1133 | */ | ||
1134 | r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); | ||
1135 | |||
1136 | r1_bio->master_bio = bio; | ||
1137 | r1_bio->sectors = bio_sectors(bio); | ||
1138 | r1_bio->state = 0; | ||
1139 | r1_bio->mddev = mddev; | ||
1140 | r1_bio->sector = bio->bi_iter.bi_sector; | ||
1141 | |||
1142 | /* We might need to issue multiple reads to different | ||
1143 | * devices if there are bad blocks around, so we keep | ||
1144 | * track of the number of reads in bio->bi_phys_segments. | ||
1145 | * If this is 0, there is only one r1_bio and no locking | ||
1146 | * will be needed when requests complete. If it is | ||
1147 | * non-zero, then it is the number of not-completed requests. | ||
1148 | */ | ||
1149 | bio->bi_phys_segments = 0; | ||
1150 | bio_clear_flag(bio, BIO_SEG_VALID); | ||
1151 | |||
1152 | if (rw == READ) { | ||
1153 | /* | ||
1154 | * read balancing logic: | ||
1155 | */ | ||
1156 | int rdisk; | ||
1157 | |||
1158 | read_again: | ||
1159 | rdisk = read_balance(conf, r1_bio, &max_sectors); | ||
1160 | |||
1161 | if (rdisk < 0) { | ||
1162 | /* couldn't find anywhere to read from */ | ||
1163 | raid_end_bio_io(r1_bio); | ||
1164 | return; | ||
1165 | } | ||
1166 | mirror = conf->mirrors + rdisk; | ||
1167 | |||
1168 | if (test_bit(WriteMostly, &mirror->rdev->flags) && | ||
1169 | bitmap) { | ||
1170 | /* Reading from a write-mostly device must | ||
1171 | * take care not to over-take any writes | ||
1172 | * that are 'behind' | ||
1173 | */ | ||
1174 | raid1_log(mddev, "wait behind writes"); | ||
1175 | wait_event(bitmap->behind_wait, | ||
1176 | atomic_read(&bitmap->behind_writes) == 0); | ||
1177 | } | ||
1178 | r1_bio->read_disk = rdisk; | ||
1179 | r1_bio->start_next_window = 0; | ||
1180 | |||
1181 | read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); | ||
1182 | bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector, | ||
1183 | max_sectors); | ||
1184 | |||
1185 | r1_bio->bios[rdisk] = read_bio; | ||
1186 | |||
1187 | read_bio->bi_iter.bi_sector = r1_bio->sector + | ||
1188 | mirror->rdev->data_offset; | ||
1189 | read_bio->bi_bdev = mirror->rdev->bdev; | ||
1190 | read_bio->bi_end_io = raid1_end_read_request; | ||
1191 | bio_set_op_attrs(read_bio, op, do_sync); | ||
1192 | if (test_bit(FailFast, &mirror->rdev->flags) && | ||
1193 | test_bit(R1BIO_FailFast, &r1_bio->state)) | ||
1194 | read_bio->bi_opf |= MD_FAILFAST; | ||
1195 | read_bio->bi_private = r1_bio; | ||
1196 | |||
1197 | if (mddev->gendisk) | ||
1198 | trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev), | ||
1199 | read_bio, disk_devt(mddev->gendisk), | ||
1200 | r1_bio->sector); | ||
1201 | |||
1202 | if (max_sectors < r1_bio->sectors) { | ||
1203 | /* could not read all from this device, so we will | ||
1204 | * need another r1_bio. | ||
1205 | */ | ||
1206 | |||
1207 | sectors_handled = (r1_bio->sector + max_sectors | ||
1208 | - bio->bi_iter.bi_sector); | ||
1209 | r1_bio->sectors = max_sectors; | ||
1210 | spin_lock_irq(&conf->device_lock); | ||
1211 | if (bio->bi_phys_segments == 0) | ||
1212 | bio->bi_phys_segments = 2; | ||
1213 | else | ||
1214 | bio->bi_phys_segments++; | ||
1215 | spin_unlock_irq(&conf->device_lock); | ||
1216 | /* Cannot call generic_make_request directly | ||
1217 | * as that will be queued in __make_request | ||
1218 | * and subsequent mempool_alloc might block waiting | ||
1219 | * for it. So hand bio over to raid1d. | ||
1220 | */ | ||
1221 | reschedule_retry(r1_bio); | ||
1222 | |||
1223 | r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); | ||
1224 | |||
1225 | r1_bio->master_bio = bio; | ||
1226 | r1_bio->sectors = bio_sectors(bio) - sectors_handled; | ||
1227 | r1_bio->state = 0; | ||
1228 | r1_bio->mddev = mddev; | ||
1229 | r1_bio->sector = bio->bi_iter.bi_sector + | ||
1230 | sectors_handled; | ||
1231 | goto read_again; | ||
1232 | } else | ||
1233 | generic_make_request(read_bio); | ||
1234 | return; | ||
1235 | } | ||
1236 | |||
1237 | /* | ||
1238 | * WRITE: | ||
1239 | */ | ||
1240 | if (conf->pending_count >= max_queued_requests) { | 1221 | if (conf->pending_count >= max_queued_requests) { |
1241 | md_wakeup_thread(mddev->thread); | 1222 | md_wakeup_thread(mddev->thread); |
1242 | raid1_log(mddev, "wait queued"); | 1223 | raid1_log(mddev, "wait queued"); |
@@ -1280,8 +1261,7 @@ read_again: | |||
1280 | int bad_sectors; | 1261 | int bad_sectors; |
1281 | int is_bad; | 1262 | int is_bad; |
1282 | 1263 | ||
1283 | is_bad = is_badblock(rdev, r1_bio->sector, | 1264 | is_bad = is_badblock(rdev, r1_bio->sector, max_sectors, |
1284 | max_sectors, | ||
1285 | &first_bad, &bad_sectors); | 1265 | &first_bad, &bad_sectors); |
1286 | if (is_bad < 0) { | 1266 | if (is_bad < 0) { |
1287 | /* mustn't write here until the bad block is | 1267 | /* mustn't write here until the bad block is |
@@ -1370,7 +1350,8 @@ read_again: | |||
1370 | continue; | 1350 | continue; |
1371 | 1351 | ||
1372 | mbio = bio_clone_mddev(bio, GFP_NOIO, mddev); | 1352 | mbio = bio_clone_mddev(bio, GFP_NOIO, mddev); |
1373 | bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector, max_sectors); | 1353 | bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector, |
1354 | max_sectors); | ||
1374 | 1355 | ||
1375 | if (first_clone) { | 1356 | if (first_clone) { |
1376 | /* do behind I/O ? | 1357 | /* do behind I/O ? |
@@ -1464,6 +1445,40 @@ read_again: | |||
1464 | wake_up(&conf->wait_barrier); | 1445 | wake_up(&conf->wait_barrier); |
1465 | } | 1446 | } |
1466 | 1447 | ||
1448 | static void raid1_make_request(struct mddev *mddev, struct bio *bio) | ||
1449 | { | ||
1450 | struct r1conf *conf = mddev->private; | ||
1451 | struct r1bio *r1_bio; | ||
1452 | |||
1453 | /* | ||
1454 | * make_request() can abort the operation when read-ahead is being | ||
1455 | * used and no empty request is available. | ||
1456 | * | ||
1457 | */ | ||
1458 | r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); | ||
1459 | |||
1460 | r1_bio->master_bio = bio; | ||
1461 | r1_bio->sectors = bio_sectors(bio); | ||
1462 | r1_bio->state = 0; | ||
1463 | r1_bio->mddev = mddev; | ||
1464 | r1_bio->sector = bio->bi_iter.bi_sector; | ||
1465 | |||
1466 | /* | ||
1467 | * We might need to issue multiple reads to different devices if there | ||
1468 | * are bad blocks around, so we keep track of the number of reads in | ||
1469 | * bio->bi_phys_segments. If this is 0, there is only one r1_bio and | ||
1470 | * no locking will be needed when requests complete. If it is | ||
1471 | * non-zero, then it is the number of not-completed requests. | ||
1472 | */ | ||
1473 | bio->bi_phys_segments = 0; | ||
1474 | bio_clear_flag(bio, BIO_SEG_VALID); | ||
1475 | |||
1476 | if (bio_data_dir(bio) == READ) | ||
1477 | raid1_read_request(mddev, bio, r1_bio); | ||
1478 | else | ||
1479 | raid1_write_request(mddev, bio, r1_bio); | ||
1480 | } | ||
1481 | |||
1467 | static void raid1_status(struct seq_file *seq, struct mddev *mddev) | 1482 | static void raid1_status(struct seq_file *seq, struct mddev *mddev) |
1468 | { | 1483 | { |
1469 | struct r1conf *conf = mddev->private; | 1484 | struct r1conf *conf = mddev->private; |
@@ -3246,8 +3261,8 @@ static void *raid1_takeover(struct mddev *mddev) | |||
3246 | if (!IS_ERR(conf)) { | 3261 | if (!IS_ERR(conf)) { |
3247 | /* Array must appear to be quiesced */ | 3262 | /* Array must appear to be quiesced */ |
3248 | conf->array_frozen = 1; | 3263 | conf->array_frozen = 1; |
3249 | clear_bit(MD_HAS_JOURNAL, &mddev->flags); | 3264 | mddev_clear_unsupported_flags(mddev, |
3250 | clear_bit(MD_JOURNAL_CLEAN, &mddev->flags); | 3265 | UNSUPPORTED_MDDEV_FLAGS); |
3251 | } | 3266 | } |
3252 | return conf; | 3267 | return conf; |
3253 | } | 3268 | } |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ab5e86209322..1920756828df 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -1087,23 +1087,122 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) | |||
1087 | kfree(plug); | 1087 | kfree(plug); |
1088 | } | 1088 | } |
1089 | 1089 | ||
1090 | static void __make_request(struct mddev *mddev, struct bio *bio) | 1090 | static void raid10_read_request(struct mddev *mddev, struct bio *bio, |
1091 | struct r10bio *r10_bio) | ||
1091 | { | 1092 | { |
1092 | struct r10conf *conf = mddev->private; | 1093 | struct r10conf *conf = mddev->private; |
1093 | struct r10bio *r10_bio; | ||
1094 | struct bio *read_bio; | 1094 | struct bio *read_bio; |
1095 | const int op = bio_op(bio); | ||
1096 | const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); | ||
1097 | int sectors_handled; | ||
1098 | int max_sectors; | ||
1099 | sector_t sectors; | ||
1100 | struct md_rdev *rdev; | ||
1101 | int slot; | ||
1102 | |||
1103 | /* | ||
1104 | * Register the new request and wait if the reconstruction | ||
1105 | * thread has put up a bar for new requests. | ||
1106 | * Continue immediately if no resync is active currently. | ||
1107 | */ | ||
1108 | wait_barrier(conf); | ||
1109 | |||
1110 | sectors = bio_sectors(bio); | ||
1111 | while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && | ||
1112 | bio->bi_iter.bi_sector < conf->reshape_progress && | ||
1113 | bio->bi_iter.bi_sector + sectors > conf->reshape_progress) { | ||
1114 | /* | ||
1115 | * IO spans the reshape position. Need to wait for reshape to | ||
1116 | * pass | ||
1117 | */ | ||
1118 | raid10_log(conf->mddev, "wait reshape"); | ||
1119 | allow_barrier(conf); | ||
1120 | wait_event(conf->wait_barrier, | ||
1121 | conf->reshape_progress <= bio->bi_iter.bi_sector || | ||
1122 | conf->reshape_progress >= bio->bi_iter.bi_sector + | ||
1123 | sectors); | ||
1124 | wait_barrier(conf); | ||
1125 | } | ||
1126 | |||
1127 | read_again: | ||
1128 | rdev = read_balance(conf, r10_bio, &max_sectors); | ||
1129 | if (!rdev) { | ||
1130 | raid_end_bio_io(r10_bio); | ||
1131 | return; | ||
1132 | } | ||
1133 | slot = r10_bio->read_slot; | ||
1134 | |||
1135 | read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); | ||
1136 | bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector, | ||
1137 | max_sectors); | ||
1138 | |||
1139 | r10_bio->devs[slot].bio = read_bio; | ||
1140 | r10_bio->devs[slot].rdev = rdev; | ||
1141 | |||
1142 | read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr + | ||
1143 | choose_data_offset(r10_bio, rdev); | ||
1144 | read_bio->bi_bdev = rdev->bdev; | ||
1145 | read_bio->bi_end_io = raid10_end_read_request; | ||
1146 | bio_set_op_attrs(read_bio, op, do_sync); | ||
1147 | if (test_bit(FailFast, &rdev->flags) && | ||
1148 | test_bit(R10BIO_FailFast, &r10_bio->state)) | ||
1149 | read_bio->bi_opf |= MD_FAILFAST; | ||
1150 | read_bio->bi_private = r10_bio; | ||
1151 | |||
1152 | if (mddev->gendisk) | ||
1153 | trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev), | ||
1154 | read_bio, disk_devt(mddev->gendisk), | ||
1155 | r10_bio->sector); | ||
1156 | if (max_sectors < r10_bio->sectors) { | ||
1157 | /* | ||
1158 | * Could not read all from this device, so we will need another | ||
1159 | * r10_bio. | ||
1160 | */ | ||
1161 | sectors_handled = (r10_bio->sector + max_sectors | ||
1162 | - bio->bi_iter.bi_sector); | ||
1163 | r10_bio->sectors = max_sectors; | ||
1164 | spin_lock_irq(&conf->device_lock); | ||
1165 | if (bio->bi_phys_segments == 0) | ||
1166 | bio->bi_phys_segments = 2; | ||
1167 | else | ||
1168 | bio->bi_phys_segments++; | ||
1169 | spin_unlock_irq(&conf->device_lock); | ||
1170 | /* | ||
1171 | * Cannot call generic_make_request directly as that will be | ||
1172 | * queued in __generic_make_request and subsequent | ||
1173 | * mempool_alloc might block waiting for it. so hand bio over | ||
1174 | * to raid10d. | ||
1175 | */ | ||
1176 | reschedule_retry(r10_bio); | ||
1177 | |||
1178 | r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); | ||
1179 | |||
1180 | r10_bio->master_bio = bio; | ||
1181 | r10_bio->sectors = bio_sectors(bio) - sectors_handled; | ||
1182 | r10_bio->state = 0; | ||
1183 | r10_bio->mddev = mddev; | ||
1184 | r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled; | ||
1185 | goto read_again; | ||
1186 | } else | ||
1187 | generic_make_request(read_bio); | ||
1188 | return; | ||
1189 | } | ||
1190 | |||
1191 | static void raid10_write_request(struct mddev *mddev, struct bio *bio, | ||
1192 | struct r10bio *r10_bio) | ||
1193 | { | ||
1194 | struct r10conf *conf = mddev->private; | ||
1095 | int i; | 1195 | int i; |
1096 | const int op = bio_op(bio); | 1196 | const int op = bio_op(bio); |
1097 | const int rw = bio_data_dir(bio); | ||
1098 | const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); | 1197 | const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); |
1099 | const unsigned long do_fua = (bio->bi_opf & REQ_FUA); | 1198 | const unsigned long do_fua = (bio->bi_opf & REQ_FUA); |
1100 | unsigned long flags; | 1199 | unsigned long flags; |
1101 | struct md_rdev *blocked_rdev; | 1200 | struct md_rdev *blocked_rdev; |
1102 | struct blk_plug_cb *cb; | 1201 | struct blk_plug_cb *cb; |
1103 | struct raid10_plug_cb *plug = NULL; | 1202 | struct raid10_plug_cb *plug = NULL; |
1203 | sector_t sectors; | ||
1104 | int sectors_handled; | 1204 | int sectors_handled; |
1105 | int max_sectors; | 1205 | int max_sectors; |
1106 | int sectors; | ||
1107 | 1206 | ||
1108 | md_write_start(mddev, bio); | 1207 | md_write_start(mddev, bio); |
1109 | 1208 | ||
@@ -1118,8 +1217,9 @@ static void __make_request(struct mddev *mddev, struct bio *bio) | |||
1118 | while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && | 1217 | while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && |
1119 | bio->bi_iter.bi_sector < conf->reshape_progress && | 1218 | bio->bi_iter.bi_sector < conf->reshape_progress && |
1120 | bio->bi_iter.bi_sector + sectors > conf->reshape_progress) { | 1219 | bio->bi_iter.bi_sector + sectors > conf->reshape_progress) { |
1121 | /* IO spans the reshape position. Need to wait for | 1220 | /* |
1122 | * reshape to pass | 1221 | * IO spans the reshape position. Need to wait for reshape to |
1222 | * pass | ||
1123 | */ | 1223 | */ |
1124 | raid10_log(conf->mddev, "wait reshape"); | 1224 | raid10_log(conf->mddev, "wait reshape"); |
1125 | allow_barrier(conf); | 1225 | allow_barrier(conf); |
@@ -1129,8 +1229,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio) | |||
1129 | sectors); | 1229 | sectors); |
1130 | wait_barrier(conf); | 1230 | wait_barrier(conf); |
1131 | } | 1231 | } |
1232 | |||
1132 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && | 1233 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && |
1133 | bio_data_dir(bio) == WRITE && | ||
1134 | (mddev->reshape_backwards | 1234 | (mddev->reshape_backwards |
1135 | ? (bio->bi_iter.bi_sector < conf->reshape_safe && | 1235 | ? (bio->bi_iter.bi_sector < conf->reshape_safe && |
1136 | bio->bi_iter.bi_sector + sectors > conf->reshape_progress) | 1236 | bio->bi_iter.bi_sector + sectors > conf->reshape_progress) |
@@ -1148,98 +1248,6 @@ static void __make_request(struct mddev *mddev, struct bio *bio) | |||
1148 | conf->reshape_safe = mddev->reshape_position; | 1248 | conf->reshape_safe = mddev->reshape_position; |
1149 | } | 1249 | } |
1150 | 1250 | ||
1151 | r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); | ||
1152 | |||
1153 | r10_bio->master_bio = bio; | ||
1154 | r10_bio->sectors = sectors; | ||
1155 | |||
1156 | r10_bio->mddev = mddev; | ||
1157 | r10_bio->sector = bio->bi_iter.bi_sector; | ||
1158 | r10_bio->state = 0; | ||
1159 | |||
1160 | /* We might need to issue multiple reads to different | ||
1161 | * devices if there are bad blocks around, so we keep | ||
1162 | * track of the number of reads in bio->bi_phys_segments. | ||
1163 | * If this is 0, there is only one r10_bio and no locking | ||
1164 | * will be needed when the request completes. If it is | ||
1165 | * non-zero, then it is the number of not-completed requests. | ||
1166 | */ | ||
1167 | bio->bi_phys_segments = 0; | ||
1168 | bio_clear_flag(bio, BIO_SEG_VALID); | ||
1169 | |||
1170 | if (rw == READ) { | ||
1171 | /* | ||
1172 | * read balancing logic: | ||
1173 | */ | ||
1174 | struct md_rdev *rdev; | ||
1175 | int slot; | ||
1176 | |||
1177 | read_again: | ||
1178 | rdev = read_balance(conf, r10_bio, &max_sectors); | ||
1179 | if (!rdev) { | ||
1180 | raid_end_bio_io(r10_bio); | ||
1181 | return; | ||
1182 | } | ||
1183 | slot = r10_bio->read_slot; | ||
1184 | |||
1185 | read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); | ||
1186 | bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector, | ||
1187 | max_sectors); | ||
1188 | |||
1189 | r10_bio->devs[slot].bio = read_bio; | ||
1190 | r10_bio->devs[slot].rdev = rdev; | ||
1191 | |||
1192 | read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr + | ||
1193 | choose_data_offset(r10_bio, rdev); | ||
1194 | read_bio->bi_bdev = rdev->bdev; | ||
1195 | read_bio->bi_end_io = raid10_end_read_request; | ||
1196 | bio_set_op_attrs(read_bio, op, do_sync); | ||
1197 | if (test_bit(FailFast, &rdev->flags) && | ||
1198 | test_bit(R10BIO_FailFast, &r10_bio->state)) | ||
1199 | read_bio->bi_opf |= MD_FAILFAST; | ||
1200 | read_bio->bi_private = r10_bio; | ||
1201 | |||
1202 | if (mddev->gendisk) | ||
1203 | trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev), | ||
1204 | read_bio, disk_devt(mddev->gendisk), | ||
1205 | r10_bio->sector); | ||
1206 | if (max_sectors < r10_bio->sectors) { | ||
1207 | /* Could not read all from this device, so we will | ||
1208 | * need another r10_bio. | ||
1209 | */ | ||
1210 | sectors_handled = (r10_bio->sector + max_sectors | ||
1211 | - bio->bi_iter.bi_sector); | ||
1212 | r10_bio->sectors = max_sectors; | ||
1213 | spin_lock_irq(&conf->device_lock); | ||
1214 | if (bio->bi_phys_segments == 0) | ||
1215 | bio->bi_phys_segments = 2; | ||
1216 | else | ||
1217 | bio->bi_phys_segments++; | ||
1218 | spin_unlock_irq(&conf->device_lock); | ||
1219 | /* Cannot call generic_make_request directly | ||
1220 | * as that will be queued in __generic_make_request | ||
1221 | * and subsequent mempool_alloc might block | ||
1222 | * waiting for it. so hand bio over to raid10d. | ||
1223 | */ | ||
1224 | reschedule_retry(r10_bio); | ||
1225 | |||
1226 | r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); | ||
1227 | |||
1228 | r10_bio->master_bio = bio; | ||
1229 | r10_bio->sectors = bio_sectors(bio) - sectors_handled; | ||
1230 | r10_bio->state = 0; | ||
1231 | r10_bio->mddev = mddev; | ||
1232 | r10_bio->sector = bio->bi_iter.bi_sector + | ||
1233 | sectors_handled; | ||
1234 | goto read_again; | ||
1235 | } else | ||
1236 | generic_make_request(read_bio); | ||
1237 | return; | ||
1238 | } | ||
1239 | |||
1240 | /* | ||
1241 | * WRITE: | ||
1242 | */ | ||
1243 | if (conf->pending_count >= max_queued_requests) { | 1251 | if (conf->pending_count >= max_queued_requests) { |
1244 | md_wakeup_thread(mddev->thread); | 1252 | md_wakeup_thread(mddev->thread); |
1245 | raid10_log(mddev, "wait queued"); | 1253 | raid10_log(mddev, "wait queued"); |
@@ -1300,8 +1308,7 @@ retry_write: | |||
1300 | int bad_sectors; | 1308 | int bad_sectors; |
1301 | int is_bad; | 1309 | int is_bad; |
1302 | 1310 | ||
1303 | is_bad = is_badblock(rdev, dev_sector, | 1311 | is_bad = is_badblock(rdev, dev_sector, max_sectors, |
1304 | max_sectors, | ||
1305 | &first_bad, &bad_sectors); | 1312 | &first_bad, &bad_sectors); |
1306 | if (is_bad < 0) { | 1313 | if (is_bad < 0) { |
1307 | /* Mustn't write here until the bad block | 1314 | /* Mustn't write here until the bad block |
@@ -1405,8 +1412,7 @@ retry_write: | |||
1405 | r10_bio->devs[i].bio = mbio; | 1412 | r10_bio->devs[i].bio = mbio; |
1406 | 1413 | ||
1407 | mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+ | 1414 | mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+ |
1408 | choose_data_offset(r10_bio, | 1415 | choose_data_offset(r10_bio, rdev)); |
1409 | rdev)); | ||
1410 | mbio->bi_bdev = rdev->bdev; | 1416 | mbio->bi_bdev = rdev->bdev; |
1411 | mbio->bi_end_io = raid10_end_write_request; | 1417 | mbio->bi_end_io = raid10_end_write_request; |
1412 | bio_set_op_attrs(mbio, op, do_sync | do_fua); | 1418 | bio_set_op_attrs(mbio, op, do_sync | do_fua); |
@@ -1457,8 +1463,7 @@ retry_write: | |||
1457 | r10_bio->devs[i].repl_bio = mbio; | 1463 | r10_bio->devs[i].repl_bio = mbio; |
1458 | 1464 | ||
1459 | mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr + | 1465 | mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr + |
1460 | choose_data_offset( | 1466 | choose_data_offset(r10_bio, rdev)); |
1461 | r10_bio, rdev)); | ||
1462 | mbio->bi_bdev = rdev->bdev; | 1467 | mbio->bi_bdev = rdev->bdev; |
1463 | mbio->bi_end_io = raid10_end_write_request; | 1468 | mbio->bi_end_io = raid10_end_write_request; |
1464 | bio_set_op_attrs(mbio, op, do_sync | do_fua); | 1469 | bio_set_op_attrs(mbio, op, do_sync | do_fua); |
@@ -1503,6 +1508,36 @@ retry_write: | |||
1503 | one_write_done(r10_bio); | 1508 | one_write_done(r10_bio); |
1504 | } | 1509 | } |
1505 | 1510 | ||
1511 | static void __make_request(struct mddev *mddev, struct bio *bio) | ||
1512 | { | ||
1513 | struct r10conf *conf = mddev->private; | ||
1514 | struct r10bio *r10_bio; | ||
1515 | |||
1516 | r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); | ||
1517 | |||
1518 | r10_bio->master_bio = bio; | ||
1519 | r10_bio->sectors = bio_sectors(bio); | ||
1520 | |||
1521 | r10_bio->mddev = mddev; | ||
1522 | r10_bio->sector = bio->bi_iter.bi_sector; | ||
1523 | r10_bio->state = 0; | ||
1524 | |||
1525 | /* | ||
1526 | * We might need to issue multiple reads to different devices if there | ||
1527 | * are bad blocks around, so we keep track of the number of reads in | ||
1528 | * bio->bi_phys_segments. If this is 0, there is only one r10_bio and | ||
1529 | * no locking will be needed when the request completes. If it is | ||
1530 | * non-zero, then it is the number of not-completed requests. | ||
1531 | */ | ||
1532 | bio->bi_phys_segments = 0; | ||
1533 | bio_clear_flag(bio, BIO_SEG_VALID); | ||
1534 | |||
1535 | if (bio_data_dir(bio) == READ) | ||
1536 | raid10_read_request(mddev, bio, r10_bio); | ||
1537 | else | ||
1538 | raid10_write_request(mddev, bio, r10_bio); | ||
1539 | } | ||
1540 | |||
1506 | static void raid10_make_request(struct mddev *mddev, struct bio *bio) | 1541 | static void raid10_make_request(struct mddev *mddev, struct bio *bio) |
1507 | { | 1542 | { |
1508 | struct r10conf *conf = mddev->private; | 1543 | struct r10conf *conf = mddev->private; |
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index d7bfb6fc8aef..0e8ed2c327b0 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c | |||
@@ -1682,8 +1682,7 @@ out: | |||
1682 | 1682 | ||
1683 | static struct stripe_head * | 1683 | static struct stripe_head * |
1684 | r5c_recovery_alloc_stripe(struct r5conf *conf, | 1684 | r5c_recovery_alloc_stripe(struct r5conf *conf, |
1685 | sector_t stripe_sect, | 1685 | sector_t stripe_sect) |
1686 | sector_t log_start) | ||
1687 | { | 1686 | { |
1688 | struct stripe_head *sh; | 1687 | struct stripe_head *sh; |
1689 | 1688 | ||
@@ -1692,7 +1691,6 @@ r5c_recovery_alloc_stripe(struct r5conf *conf, | |||
1692 | return NULL; /* no more stripe available */ | 1691 | return NULL; /* no more stripe available */ |
1693 | 1692 | ||
1694 | r5l_recovery_reset_stripe(sh); | 1693 | r5l_recovery_reset_stripe(sh); |
1695 | sh->log_start = log_start; | ||
1696 | 1694 | ||
1697 | return sh; | 1695 | return sh; |
1698 | } | 1696 | } |
@@ -1862,7 +1860,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, | |||
1862 | stripe_sect); | 1860 | stripe_sect); |
1863 | 1861 | ||
1864 | if (!sh) { | 1862 | if (!sh) { |
1865 | sh = r5c_recovery_alloc_stripe(conf, stripe_sect, ctx->pos); | 1863 | sh = r5c_recovery_alloc_stripe(conf, stripe_sect); |
1866 | /* | 1864 | /* |
1867 | * cannot get stripe from raid5_get_active_stripe | 1865 | * cannot get stripe from raid5_get_active_stripe |
1868 | * try replay some stripes | 1866 | * try replay some stripes |
@@ -1871,7 +1869,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, | |||
1871 | r5c_recovery_replay_stripes( | 1869 | r5c_recovery_replay_stripes( |
1872 | cached_stripe_list, ctx); | 1870 | cached_stripe_list, ctx); |
1873 | sh = r5c_recovery_alloc_stripe( | 1871 | sh = r5c_recovery_alloc_stripe( |
1874 | conf, stripe_sect, ctx->pos); | 1872 | conf, stripe_sect); |
1875 | } | 1873 | } |
1876 | if (!sh) { | 1874 | if (!sh) { |
1877 | pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n", | 1875 | pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n", |
@@ -1879,8 +1877,8 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, | |||
1879 | conf->min_nr_stripes * 2); | 1877 | conf->min_nr_stripes * 2); |
1880 | raid5_set_cache_size(mddev, | 1878 | raid5_set_cache_size(mddev, |
1881 | conf->min_nr_stripes * 2); | 1879 | conf->min_nr_stripes * 2); |
1882 | sh = r5c_recovery_alloc_stripe( | 1880 | sh = r5c_recovery_alloc_stripe(conf, |
1883 | conf, stripe_sect, ctx->pos); | 1881 | stripe_sect); |
1884 | } | 1882 | } |
1885 | if (!sh) { | 1883 | if (!sh) { |
1886 | pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n", | 1884 | pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n", |
@@ -1894,7 +1892,6 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, | |||
1894 | if (!test_bit(STRIPE_R5C_CACHING, &sh->state) && | 1892 | if (!test_bit(STRIPE_R5C_CACHING, &sh->state) && |
1895 | test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags)) { | 1893 | test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags)) { |
1896 | r5l_recovery_replay_one_stripe(conf, sh, ctx); | 1894 | r5l_recovery_replay_one_stripe(conf, sh, ctx); |
1897 | sh->log_start = ctx->pos; | ||
1898 | list_move_tail(&sh->lru, cached_stripe_list); | 1895 | list_move_tail(&sh->lru, cached_stripe_list); |
1899 | } | 1896 | } |
1900 | r5l_recovery_load_data(log, sh, ctx, payload, | 1897 | r5l_recovery_load_data(log, sh, ctx, payload, |
@@ -1933,8 +1930,6 @@ static void r5c_recovery_load_one_stripe(struct r5l_log *log, | |||
1933 | set_bit(R5_UPTODATE, &dev->flags); | 1930 | set_bit(R5_UPTODATE, &dev->flags); |
1934 | } | 1931 | } |
1935 | } | 1932 | } |
1936 | list_add_tail(&sh->r5c, &log->stripe_in_journal_list); | ||
1937 | atomic_inc(&log->stripe_in_journal_count); | ||
1938 | } | 1933 | } |
1939 | 1934 | ||
1940 | /* | 1935 | /* |
@@ -2070,6 +2065,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, | |||
2070 | struct stripe_head *sh, *next; | 2065 | struct stripe_head *sh, *next; |
2071 | struct mddev *mddev = log->rdev->mddev; | 2066 | struct mddev *mddev = log->rdev->mddev; |
2072 | struct page *page; | 2067 | struct page *page; |
2068 | sector_t next_checkpoint = MaxSector; | ||
2073 | 2069 | ||
2074 | page = alloc_page(GFP_KERNEL); | 2070 | page = alloc_page(GFP_KERNEL); |
2075 | if (!page) { | 2071 | if (!page) { |
@@ -2078,6 +2074,8 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, | |||
2078 | return -ENOMEM; | 2074 | return -ENOMEM; |
2079 | } | 2075 | } |
2080 | 2076 | ||
2077 | WARN_ON(list_empty(&ctx->cached_list)); | ||
2078 | |||
2081 | list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) { | 2079 | list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) { |
2082 | struct r5l_meta_block *mb; | 2080 | struct r5l_meta_block *mb; |
2083 | int i; | 2081 | int i; |
@@ -2123,12 +2121,15 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, | |||
2123 | sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page, | 2121 | sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page, |
2124 | REQ_OP_WRITE, REQ_FUA, false); | 2122 | REQ_OP_WRITE, REQ_FUA, false); |
2125 | sh->log_start = ctx->pos; | 2123 | sh->log_start = ctx->pos; |
2124 | list_add_tail(&sh->r5c, &log->stripe_in_journal_list); | ||
2125 | atomic_inc(&log->stripe_in_journal_count); | ||
2126 | ctx->pos = write_pos; | 2126 | ctx->pos = write_pos; |
2127 | ctx->seq += 1; | 2127 | ctx->seq += 1; |
2128 | 2128 | next_checkpoint = sh->log_start; | |
2129 | list_del_init(&sh->lru); | 2129 | list_del_init(&sh->lru); |
2130 | raid5_release_stripe(sh); | 2130 | raid5_release_stripe(sh); |
2131 | } | 2131 | } |
2132 | log->next_checkpoint = next_checkpoint; | ||
2132 | __free_page(page); | 2133 | __free_page(page); |
2133 | return 0; | 2134 | return 0; |
2134 | } | 2135 | } |
@@ -2139,7 +2140,6 @@ static int r5l_recovery_log(struct r5l_log *log) | |||
2139 | struct r5l_recovery_ctx ctx; | 2140 | struct r5l_recovery_ctx ctx; |
2140 | int ret; | 2141 | int ret; |
2141 | sector_t pos; | 2142 | sector_t pos; |
2142 | struct stripe_head *sh; | ||
2143 | 2143 | ||
2144 | ctx.pos = log->last_checkpoint; | 2144 | ctx.pos = log->last_checkpoint; |
2145 | ctx.seq = log->last_cp_seq; | 2145 | ctx.seq = log->last_cp_seq; |
@@ -2164,16 +2164,13 @@ static int r5l_recovery_log(struct r5l_log *log) | |||
2164 | log->next_checkpoint = ctx.pos; | 2164 | log->next_checkpoint = ctx.pos; |
2165 | r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++); | 2165 | r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++); |
2166 | ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS); | 2166 | ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS); |
2167 | } else { | ||
2168 | sh = list_last_entry(&ctx.cached_list, struct stripe_head, lru); | ||
2169 | log->next_checkpoint = sh->log_start; | ||
2170 | } | 2167 | } |
2171 | 2168 | ||
2172 | if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0)) | 2169 | if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0)) |
2173 | pr_debug("md/raid:%s: starting from clean shutdown\n", | 2170 | pr_debug("md/raid:%s: starting from clean shutdown\n", |
2174 | mdname(mddev)); | 2171 | mdname(mddev)); |
2175 | else { | 2172 | else { |
2176 | pr_debug("md/raid:%s: recoverying %d data-only stripes and %d data-parity stripes\n", | 2173 | pr_debug("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n", |
2177 | mdname(mddev), ctx.data_only_stripes, | 2174 | mdname(mddev), ctx.data_only_stripes, |
2178 | ctx.data_parity_stripes); | 2175 | ctx.data_parity_stripes); |
2179 | 2176 | ||
@@ -2418,9 +2415,6 @@ void r5c_finish_stripe_write_out(struct r5conf *conf, | |||
2418 | if (do_wakeup) | 2415 | if (do_wakeup) |
2419 | wake_up(&conf->wait_for_overlap); | 2416 | wake_up(&conf->wait_for_overlap); |
2420 | 2417 | ||
2421 | if (conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) | ||
2422 | return; | ||
2423 | |||
2424 | spin_lock_irq(&conf->log->stripe_in_journal_lock); | 2418 | spin_lock_irq(&conf->log->stripe_in_journal_lock); |
2425 | list_del_init(&sh->r5c); | 2419 | list_del_init(&sh->r5c); |
2426 | spin_unlock_irq(&conf->log->stripe_in_journal_lock); | 2420 | spin_unlock_irq(&conf->log->stripe_in_journal_lock); |
@@ -2639,14 +2633,16 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) | |||
2639 | spin_lock_init(&log->stripe_in_journal_lock); | 2633 | spin_lock_init(&log->stripe_in_journal_lock); |
2640 | atomic_set(&log->stripe_in_journal_count, 0); | 2634 | atomic_set(&log->stripe_in_journal_count, 0); |
2641 | 2635 | ||
2636 | rcu_assign_pointer(conf->log, log); | ||
2637 | |||
2642 | if (r5l_load_log(log)) | 2638 | if (r5l_load_log(log)) |
2643 | goto error; | 2639 | goto error; |
2644 | 2640 | ||
2645 | rcu_assign_pointer(conf->log, log); | ||
2646 | set_bit(MD_HAS_JOURNAL, &conf->mddev->flags); | 2641 | set_bit(MD_HAS_JOURNAL, &conf->mddev->flags); |
2647 | return 0; | 2642 | return 0; |
2648 | 2643 | ||
2649 | error: | 2644 | error: |
2645 | rcu_assign_pointer(conf->log, NULL); | ||
2650 | md_unregister_thread(&log->reclaim_thread); | 2646 | md_unregister_thread(&log->reclaim_thread); |
2651 | reclaim_thread: | 2647 | reclaim_thread: |
2652 | mempool_destroy(log->meta_pool); | 2648 | mempool_destroy(log->meta_pool); |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 06d7279bdd04..36c13e4be9c9 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -62,6 +62,8 @@ | |||
62 | #include "raid0.h" | 62 | #include "raid0.h" |
63 | #include "bitmap.h" | 63 | #include "bitmap.h" |
64 | 64 | ||
65 | #define UNSUPPORTED_MDDEV_FLAGS (1L << MD_FAILFAST_SUPPORTED) | ||
66 | |||
65 | #define cpu_to_group(cpu) cpu_to_node(cpu) | 67 | #define cpu_to_group(cpu) cpu_to_node(cpu) |
66 | #define ANY_GROUP NUMA_NO_NODE | 68 | #define ANY_GROUP NUMA_NO_NODE |
67 | 69 | ||
@@ -7829,8 +7831,9 @@ static void *raid5_takeover_raid1(struct mddev *mddev) | |||
7829 | mddev->new_chunk_sectors = chunksect; | 7831 | mddev->new_chunk_sectors = chunksect; |
7830 | 7832 | ||
7831 | ret = setup_conf(mddev); | 7833 | ret = setup_conf(mddev); |
7832 | if (!IS_ERR_VALUE(ret)) | 7834 | if (!IS_ERR(ret)) |
7833 | clear_bit(MD_FAILFAST_SUPPORTED, &mddev->flags); | 7835 | mddev_clear_unsupported_flags(mddev, |
7836 | UNSUPPORTED_MDDEV_FLAGS); | ||
7834 | return ret; | 7837 | return ret; |
7835 | } | 7838 | } |
7836 | 7839 | ||