diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-13 16:22:01 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-13 16:22:01 -0400 |
commit | 9db908806b85c1430150fbafe269a7b21b07d15d (patch) | |
tree | 3911759c93e0be26b6771e1a92b75612b206ffa5 /drivers/md/raid5.c | |
parent | 4d7127dace8cf4b05eb7c8c8531fc204fbb195f4 (diff) | |
parent | 72f36d5972a166197036c1281963f6863c429bf2 (diff) |
Merge tag 'md-3.7' of git://neil.brown.name/md
Pull md updates from NeilBrown:
- "discard" support, some dm-raid improvements and other assorted bits
and pieces.
* tag 'md-3.7' of git://neil.brown.name/md: (29 commits)
md: refine reporting of resync/reshape delays.
md/raid5: be careful not to resize_stripes too big.
md: make sure manual changes to recovery checkpoint are saved.
md/raid10: use correct limit variable
md: writing to sync_action should clear the read-auto state.
Subject: [PATCH] md:change resync_mismatches to atomic64_t to avoid races
md/raid5: make sure to_read and to_write never go negative.
md: When RAID5 is dirty, force reconstruct-write instead of read-modify-write.
md/raid5: protect debug message against NULL derefernce.
md/raid5: add some missing locking in handle_failed_stripe.
MD: raid5 avoid unnecessary zero page for trim
MD: raid5 trim support
md/bitmap:Don't use IS_ERR to judge alloc_page().
md/raid1: Don't release reference to device while handling read error.
raid: replace list_for_each_continue_rcu with new interface
add further __init annotations to crypto/xor.c
DM RAID: Fix for "sync" directive ineffectiveness
DM RAID: Fix comparison of index and quantity for "rebuild" parameter
DM RAID: Add rebuild capability for RAID10
DM RAID: Move 'rebuild' checking code to its own function
...
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 219 |
1 files changed, 197 insertions, 22 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 0689173fd9f5..c5439dce0295 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -551,6 +551,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
551 | rw = WRITE_FUA; | 551 | rw = WRITE_FUA; |
552 | else | 552 | else |
553 | rw = WRITE; | 553 | rw = WRITE; |
554 | if (test_bit(R5_Discard, &sh->dev[i].flags)) | ||
555 | rw |= REQ_DISCARD; | ||
554 | } else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) | 556 | } else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) |
555 | rw = READ; | 557 | rw = READ; |
556 | else if (test_and_clear_bit(R5_WantReplace, | 558 | else if (test_and_clear_bit(R5_WantReplace, |
@@ -1174,8 +1176,11 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
1174 | set_bit(R5_WantFUA, &dev->flags); | 1176 | set_bit(R5_WantFUA, &dev->flags); |
1175 | if (wbi->bi_rw & REQ_SYNC) | 1177 | if (wbi->bi_rw & REQ_SYNC) |
1176 | set_bit(R5_SyncIO, &dev->flags); | 1178 | set_bit(R5_SyncIO, &dev->flags); |
1177 | tx = async_copy_data(1, wbi, dev->page, | 1179 | if (wbi->bi_rw & REQ_DISCARD) |
1178 | dev->sector, tx); | 1180 | set_bit(R5_Discard, &dev->flags); |
1181 | else | ||
1182 | tx = async_copy_data(1, wbi, dev->page, | ||
1183 | dev->sector, tx); | ||
1179 | wbi = r5_next_bio(wbi, dev->sector); | 1184 | wbi = r5_next_bio(wbi, dev->sector); |
1180 | } | 1185 | } |
1181 | } | 1186 | } |
@@ -1191,7 +1196,7 @@ static void ops_complete_reconstruct(void *stripe_head_ref) | |||
1191 | int pd_idx = sh->pd_idx; | 1196 | int pd_idx = sh->pd_idx; |
1192 | int qd_idx = sh->qd_idx; | 1197 | int qd_idx = sh->qd_idx; |
1193 | int i; | 1198 | int i; |
1194 | bool fua = false, sync = false; | 1199 | bool fua = false, sync = false, discard = false; |
1195 | 1200 | ||
1196 | pr_debug("%s: stripe %llu\n", __func__, | 1201 | pr_debug("%s: stripe %llu\n", __func__, |
1197 | (unsigned long long)sh->sector); | 1202 | (unsigned long long)sh->sector); |
@@ -1199,13 +1204,15 @@ static void ops_complete_reconstruct(void *stripe_head_ref) | |||
1199 | for (i = disks; i--; ) { | 1204 | for (i = disks; i--; ) { |
1200 | fua |= test_bit(R5_WantFUA, &sh->dev[i].flags); | 1205 | fua |= test_bit(R5_WantFUA, &sh->dev[i].flags); |
1201 | sync |= test_bit(R5_SyncIO, &sh->dev[i].flags); | 1206 | sync |= test_bit(R5_SyncIO, &sh->dev[i].flags); |
1207 | discard |= test_bit(R5_Discard, &sh->dev[i].flags); | ||
1202 | } | 1208 | } |
1203 | 1209 | ||
1204 | for (i = disks; i--; ) { | 1210 | for (i = disks; i--; ) { |
1205 | struct r5dev *dev = &sh->dev[i]; | 1211 | struct r5dev *dev = &sh->dev[i]; |
1206 | 1212 | ||
1207 | if (dev->written || i == pd_idx || i == qd_idx) { | 1213 | if (dev->written || i == pd_idx || i == qd_idx) { |
1208 | set_bit(R5_UPTODATE, &dev->flags); | 1214 | if (!discard) |
1215 | set_bit(R5_UPTODATE, &dev->flags); | ||
1209 | if (fua) | 1216 | if (fua) |
1210 | set_bit(R5_WantFUA, &dev->flags); | 1217 | set_bit(R5_WantFUA, &dev->flags); |
1211 | if (sync) | 1218 | if (sync) |
@@ -1241,6 +1248,18 @@ ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu, | |||
1241 | pr_debug("%s: stripe %llu\n", __func__, | 1248 | pr_debug("%s: stripe %llu\n", __func__, |
1242 | (unsigned long long)sh->sector); | 1249 | (unsigned long long)sh->sector); |
1243 | 1250 | ||
1251 | for (i = 0; i < sh->disks; i++) { | ||
1252 | if (pd_idx == i) | ||
1253 | continue; | ||
1254 | if (!test_bit(R5_Discard, &sh->dev[i].flags)) | ||
1255 | break; | ||
1256 | } | ||
1257 | if (i >= sh->disks) { | ||
1258 | atomic_inc(&sh->count); | ||
1259 | set_bit(R5_Discard, &sh->dev[pd_idx].flags); | ||
1260 | ops_complete_reconstruct(sh); | ||
1261 | return; | ||
1262 | } | ||
1244 | /* check if prexor is active which means only process blocks | 1263 | /* check if prexor is active which means only process blocks |
1245 | * that are part of a read-modify-write (written) | 1264 | * that are part of a read-modify-write (written) |
1246 | */ | 1265 | */ |
@@ -1285,10 +1304,24 @@ ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu, | |||
1285 | { | 1304 | { |
1286 | struct async_submit_ctl submit; | 1305 | struct async_submit_ctl submit; |
1287 | struct page **blocks = percpu->scribble; | 1306 | struct page **blocks = percpu->scribble; |
1288 | int count; | 1307 | int count, i; |
1289 | 1308 | ||
1290 | pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); | 1309 | pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); |
1291 | 1310 | ||
1311 | for (i = 0; i < sh->disks; i++) { | ||
1312 | if (sh->pd_idx == i || sh->qd_idx == i) | ||
1313 | continue; | ||
1314 | if (!test_bit(R5_Discard, &sh->dev[i].flags)) | ||
1315 | break; | ||
1316 | } | ||
1317 | if (i >= sh->disks) { | ||
1318 | atomic_inc(&sh->count); | ||
1319 | set_bit(R5_Discard, &sh->dev[sh->pd_idx].flags); | ||
1320 | set_bit(R5_Discard, &sh->dev[sh->qd_idx].flags); | ||
1321 | ops_complete_reconstruct(sh); | ||
1322 | return; | ||
1323 | } | ||
1324 | |||
1292 | count = set_syndrome_sources(blocks, sh); | 1325 | count = set_syndrome_sources(blocks, sh); |
1293 | 1326 | ||
1294 | atomic_inc(&sh->count); | 1327 | atomic_inc(&sh->count); |
@@ -2408,11 +2441,11 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
2408 | if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) | 2441 | if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) |
2409 | set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); | 2442 | set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); |
2410 | } | 2443 | } |
2411 | spin_unlock_irq(&sh->stripe_lock); | ||
2412 | 2444 | ||
2413 | pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", | 2445 | pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", |
2414 | (unsigned long long)(*bip)->bi_sector, | 2446 | (unsigned long long)(*bip)->bi_sector, |
2415 | (unsigned long long)sh->sector, dd_idx); | 2447 | (unsigned long long)sh->sector, dd_idx); |
2448 | spin_unlock_irq(&sh->stripe_lock); | ||
2416 | 2449 | ||
2417 | if (conf->mddev->bitmap && firstwrite) { | 2450 | if (conf->mddev->bitmap && firstwrite) { |
2418 | bitmap_startwrite(conf->mddev->bitmap, sh->sector, | 2451 | bitmap_startwrite(conf->mddev->bitmap, sh->sector, |
@@ -2479,10 +2512,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
2479 | bi = sh->dev[i].towrite; | 2512 | bi = sh->dev[i].towrite; |
2480 | sh->dev[i].towrite = NULL; | 2513 | sh->dev[i].towrite = NULL; |
2481 | spin_unlock_irq(&sh->stripe_lock); | 2514 | spin_unlock_irq(&sh->stripe_lock); |
2482 | if (bi) { | 2515 | if (bi) |
2483 | s->to_write--; | ||
2484 | bitmap_end = 1; | 2516 | bitmap_end = 1; |
2485 | } | ||
2486 | 2517 | ||
2487 | if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) | 2518 | if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) |
2488 | wake_up(&conf->wait_for_overlap); | 2519 | wake_up(&conf->wait_for_overlap); |
@@ -2524,11 +2555,12 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
2524 | if (!test_bit(R5_Wantfill, &sh->dev[i].flags) && | 2555 | if (!test_bit(R5_Wantfill, &sh->dev[i].flags) && |
2525 | (!test_bit(R5_Insync, &sh->dev[i].flags) || | 2556 | (!test_bit(R5_Insync, &sh->dev[i].flags) || |
2526 | test_bit(R5_ReadError, &sh->dev[i].flags))) { | 2557 | test_bit(R5_ReadError, &sh->dev[i].flags))) { |
2558 | spin_lock_irq(&sh->stripe_lock); | ||
2527 | bi = sh->dev[i].toread; | 2559 | bi = sh->dev[i].toread; |
2528 | sh->dev[i].toread = NULL; | 2560 | sh->dev[i].toread = NULL; |
2561 | spin_unlock_irq(&sh->stripe_lock); | ||
2529 | if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) | 2562 | if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) |
2530 | wake_up(&conf->wait_for_overlap); | 2563 | wake_up(&conf->wait_for_overlap); |
2531 | if (bi) s->to_read--; | ||
2532 | while (bi && bi->bi_sector < | 2564 | while (bi && bi->bi_sector < |
2533 | sh->dev[i].sector + STRIPE_SECTORS) { | 2565 | sh->dev[i].sector + STRIPE_SECTORS) { |
2534 | struct bio *nextbi = | 2566 | struct bio *nextbi = |
@@ -2741,7 +2773,8 @@ static void handle_stripe_clean_event(struct r5conf *conf, | |||
2741 | if (sh->dev[i].written) { | 2773 | if (sh->dev[i].written) { |
2742 | dev = &sh->dev[i]; | 2774 | dev = &sh->dev[i]; |
2743 | if (!test_bit(R5_LOCKED, &dev->flags) && | 2775 | if (!test_bit(R5_LOCKED, &dev->flags) && |
2744 | test_bit(R5_UPTODATE, &dev->flags)) { | 2776 | (test_bit(R5_UPTODATE, &dev->flags) || |
2777 | test_and_clear_bit(R5_Discard, &dev->flags))) { | ||
2745 | /* We can return any write requests */ | 2778 | /* We can return any write requests */ |
2746 | struct bio *wbi, *wbi2; | 2779 | struct bio *wbi, *wbi2; |
2747 | pr_debug("Return write for disc %d\n", i); | 2780 | pr_debug("Return write for disc %d\n", i); |
@@ -2775,12 +2808,25 @@ static void handle_stripe_dirtying(struct r5conf *conf, | |||
2775 | int disks) | 2808 | int disks) |
2776 | { | 2809 | { |
2777 | int rmw = 0, rcw = 0, i; | 2810 | int rmw = 0, rcw = 0, i; |
2778 | if (conf->max_degraded == 2) { | 2811 | sector_t recovery_cp = conf->mddev->recovery_cp; |
2779 | /* RAID6 requires 'rcw' in current implementation | 2812 | |
2780 | * Calculate the real rcw later - for now fake it | 2813 | /* RAID6 requires 'rcw' in current implementation. |
2814 | * Otherwise, check whether resync is now happening or should start. | ||
2815 | * If yes, then the array is dirty (after unclean shutdown or | ||
2816 | * initial creation), so parity in some stripes might be inconsistent. | ||
2817 | * In this case, we need to always do reconstruct-write, to ensure | ||
2818 | * that in case of drive failure or read-error correction, we | ||
2819 | * generate correct data from the parity. | ||
2820 | */ | ||
2821 | if (conf->max_degraded == 2 || | ||
2822 | (recovery_cp < MaxSector && sh->sector >= recovery_cp)) { | ||
2823 | /* Calculate the real rcw later - for now make it | ||
2781 | * look like rcw is cheaper | 2824 | * look like rcw is cheaper |
2782 | */ | 2825 | */ |
2783 | rcw = 1; rmw = 2; | 2826 | rcw = 1; rmw = 2; |
2827 | pr_debug("force RCW max_degraded=%u, recovery_cp=%llu sh->sector=%llu\n", | ||
2828 | conf->max_degraded, (unsigned long long)recovery_cp, | ||
2829 | (unsigned long long)sh->sector); | ||
2784 | } else for (i = disks; i--; ) { | 2830 | } else for (i = disks; i--; ) { |
2785 | /* would I have to read this buffer for read_modify_write */ | 2831 | /* would I have to read this buffer for read_modify_write */ |
2786 | struct r5dev *dev = &sh->dev[i]; | 2832 | struct r5dev *dev = &sh->dev[i]; |
@@ -2932,7 +2978,7 @@ static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh, | |||
2932 | */ | 2978 | */ |
2933 | set_bit(STRIPE_INSYNC, &sh->state); | 2979 | set_bit(STRIPE_INSYNC, &sh->state); |
2934 | else { | 2980 | else { |
2935 | conf->mddev->resync_mismatches += STRIPE_SECTORS; | 2981 | atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches); |
2936 | if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) | 2982 | if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) |
2937 | /* don't try to repair!! */ | 2983 | /* don't try to repair!! */ |
2938 | set_bit(STRIPE_INSYNC, &sh->state); | 2984 | set_bit(STRIPE_INSYNC, &sh->state); |
@@ -3084,7 +3130,7 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh, | |||
3084 | */ | 3130 | */ |
3085 | } | 3131 | } |
3086 | } else { | 3132 | } else { |
3087 | conf->mddev->resync_mismatches += STRIPE_SECTORS; | 3133 | atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches); |
3088 | if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) | 3134 | if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) |
3089 | /* don't try to repair!! */ | 3135 | /* don't try to repair!! */ |
3090 | set_bit(STRIPE_INSYNC, &sh->state); | 3136 | set_bit(STRIPE_INSYNC, &sh->state); |
@@ -3459,10 +3505,12 @@ static void handle_stripe(struct stripe_head *sh) | |||
3459 | if (s.written && | 3505 | if (s.written && |
3460 | (s.p_failed || ((test_bit(R5_Insync, &pdev->flags) | 3506 | (s.p_failed || ((test_bit(R5_Insync, &pdev->flags) |
3461 | && !test_bit(R5_LOCKED, &pdev->flags) | 3507 | && !test_bit(R5_LOCKED, &pdev->flags) |
3462 | && test_bit(R5_UPTODATE, &pdev->flags)))) && | 3508 | && (test_bit(R5_UPTODATE, &pdev->flags) || |
3509 | test_bit(R5_Discard, &pdev->flags))))) && | ||
3463 | (s.q_failed || ((test_bit(R5_Insync, &qdev->flags) | 3510 | (s.q_failed || ((test_bit(R5_Insync, &qdev->flags) |
3464 | && !test_bit(R5_LOCKED, &qdev->flags) | 3511 | && !test_bit(R5_LOCKED, &qdev->flags) |
3465 | && test_bit(R5_UPTODATE, &qdev->flags))))) | 3512 | && (test_bit(R5_UPTODATE, &qdev->flags) || |
3513 | test_bit(R5_Discard, &qdev->flags)))))) | ||
3466 | handle_stripe_clean_event(conf, sh, disks, &s.return_bi); | 3514 | handle_stripe_clean_event(conf, sh, disks, &s.return_bi); |
3467 | 3515 | ||
3468 | /* Now we might consider reading some blocks, either to check/generate | 3516 | /* Now we might consider reading some blocks, either to check/generate |
@@ -3489,9 +3537,11 @@ static void handle_stripe(struct stripe_head *sh) | |||
3489 | /* All the 'written' buffers and the parity block are ready to | 3537 | /* All the 'written' buffers and the parity block are ready to |
3490 | * be written back to disk | 3538 | * be written back to disk |
3491 | */ | 3539 | */ |
3492 | BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags)); | 3540 | BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags) && |
3541 | !test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)); | ||
3493 | BUG_ON(sh->qd_idx >= 0 && | 3542 | BUG_ON(sh->qd_idx >= 0 && |
3494 | !test_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags)); | 3543 | !test_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags) && |
3544 | !test_bit(R5_Discard, &sh->dev[sh->qd_idx].flags)); | ||
3495 | for (i = disks; i--; ) { | 3545 | for (i = disks; i--; ) { |
3496 | struct r5dev *dev = &sh->dev[i]; | 3546 | struct r5dev *dev = &sh->dev[i]; |
3497 | if (test_bit(R5_LOCKED, &dev->flags) && | 3547 | if (test_bit(R5_LOCKED, &dev->flags) && |
@@ -4072,6 +4122,88 @@ static void release_stripe_plug(struct mddev *mddev, | |||
4072 | release_stripe(sh); | 4122 | release_stripe(sh); |
4073 | } | 4123 | } |
4074 | 4124 | ||
4125 | static void make_discard_request(struct mddev *mddev, struct bio *bi) | ||
4126 | { | ||
4127 | struct r5conf *conf = mddev->private; | ||
4128 | sector_t logical_sector, last_sector; | ||
4129 | struct stripe_head *sh; | ||
4130 | int remaining; | ||
4131 | int stripe_sectors; | ||
4132 | |||
4133 | if (mddev->reshape_position != MaxSector) | ||
4134 | /* Skip discard while reshape is happening */ | ||
4135 | return; | ||
4136 | |||
4137 | logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1); | ||
4138 | last_sector = bi->bi_sector + (bi->bi_size>>9); | ||
4139 | |||
4140 | bi->bi_next = NULL; | ||
4141 | bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ | ||
4142 | |||
4143 | stripe_sectors = conf->chunk_sectors * | ||
4144 | (conf->raid_disks - conf->max_degraded); | ||
4145 | logical_sector = DIV_ROUND_UP_SECTOR_T(logical_sector, | ||
4146 | stripe_sectors); | ||
4147 | sector_div(last_sector, stripe_sectors); | ||
4148 | |||
4149 | logical_sector *= conf->chunk_sectors; | ||
4150 | last_sector *= conf->chunk_sectors; | ||
4151 | |||
4152 | for (; logical_sector < last_sector; | ||
4153 | logical_sector += STRIPE_SECTORS) { | ||
4154 | DEFINE_WAIT(w); | ||
4155 | int d; | ||
4156 | again: | ||
4157 | sh = get_active_stripe(conf, logical_sector, 0, 0, 0); | ||
4158 | prepare_to_wait(&conf->wait_for_overlap, &w, | ||
4159 | TASK_UNINTERRUPTIBLE); | ||
4160 | spin_lock_irq(&sh->stripe_lock); | ||
4161 | for (d = 0; d < conf->raid_disks; d++) { | ||
4162 | if (d == sh->pd_idx || d == sh->qd_idx) | ||
4163 | continue; | ||
4164 | if (sh->dev[d].towrite || sh->dev[d].toread) { | ||
4165 | set_bit(R5_Overlap, &sh->dev[d].flags); | ||
4166 | spin_unlock_irq(&sh->stripe_lock); | ||
4167 | release_stripe(sh); | ||
4168 | schedule(); | ||
4169 | goto again; | ||
4170 | } | ||
4171 | } | ||
4172 | finish_wait(&conf->wait_for_overlap, &w); | ||
4173 | for (d = 0; d < conf->raid_disks; d++) { | ||
4174 | if (d == sh->pd_idx || d == sh->qd_idx) | ||
4175 | continue; | ||
4176 | sh->dev[d].towrite = bi; | ||
4177 | set_bit(R5_OVERWRITE, &sh->dev[d].flags); | ||
4178 | raid5_inc_bi_active_stripes(bi); | ||
4179 | } | ||
4180 | spin_unlock_irq(&sh->stripe_lock); | ||
4181 | if (conf->mddev->bitmap) { | ||
4182 | for (d = 0; | ||
4183 | d < conf->raid_disks - conf->max_degraded; | ||
4184 | d++) | ||
4185 | bitmap_startwrite(mddev->bitmap, | ||
4186 | sh->sector, | ||
4187 | STRIPE_SECTORS, | ||
4188 | 0); | ||
4189 | sh->bm_seq = conf->seq_flush + 1; | ||
4190 | set_bit(STRIPE_BIT_DELAY, &sh->state); | ||
4191 | } | ||
4192 | |||
4193 | set_bit(STRIPE_HANDLE, &sh->state); | ||
4194 | clear_bit(STRIPE_DELAYED, &sh->state); | ||
4195 | if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | ||
4196 | atomic_inc(&conf->preread_active_stripes); | ||
4197 | release_stripe_plug(mddev, sh); | ||
4198 | } | ||
4199 | |||
4200 | remaining = raid5_dec_bi_active_stripes(bi); | ||
4201 | if (remaining == 0) { | ||
4202 | md_write_end(mddev); | ||
4203 | bio_endio(bi, 0); | ||
4204 | } | ||
4205 | } | ||
4206 | |||
4075 | static void make_request(struct mddev *mddev, struct bio * bi) | 4207 | static void make_request(struct mddev *mddev, struct bio * bi) |
4076 | { | 4208 | { |
4077 | struct r5conf *conf = mddev->private; | 4209 | struct r5conf *conf = mddev->private; |
@@ -4094,6 +4226,11 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
4094 | chunk_aligned_read(mddev,bi)) | 4226 | chunk_aligned_read(mddev,bi)) |
4095 | return; | 4227 | return; |
4096 | 4228 | ||
4229 | if (unlikely(bi->bi_rw & REQ_DISCARD)) { | ||
4230 | make_discard_request(mddev, bi); | ||
4231 | return; | ||
4232 | } | ||
4233 | |||
4097 | logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1); | 4234 | logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1); |
4098 | last_sector = bi->bi_sector + (bi->bi_size>>9); | 4235 | last_sector = bi->bi_sector + (bi->bi_size>>9); |
4099 | bi->bi_next = NULL; | 4236 | bi->bi_next = NULL; |
@@ -4630,8 +4767,9 @@ static int handle_active_stripes(struct r5conf *conf) | |||
4630 | * During the scan, completed stripes are saved for us by the interrupt | 4767 | * During the scan, completed stripes are saved for us by the interrupt |
4631 | * handler, so that they will not have to wait for our next wakeup. | 4768 | * handler, so that they will not have to wait for our next wakeup. |
4632 | */ | 4769 | */ |
4633 | static void raid5d(struct mddev *mddev) | 4770 | static void raid5d(struct md_thread *thread) |
4634 | { | 4771 | { |
4772 | struct mddev *mddev = thread->mddev; | ||
4635 | struct r5conf *conf = mddev->private; | 4773 | struct r5conf *conf = mddev->private; |
4636 | int handled; | 4774 | int handled; |
4637 | struct blk_plug plug; | 4775 | struct blk_plug plug; |
@@ -5366,6 +5504,7 @@ static int run(struct mddev *mddev) | |||
5366 | 5504 | ||
5367 | if (mddev->queue) { | 5505 | if (mddev->queue) { |
5368 | int chunk_size; | 5506 | int chunk_size; |
5507 | bool discard_supported = true; | ||
5369 | /* read-ahead size must cover two whole stripes, which | 5508 | /* read-ahead size must cover two whole stripes, which |
5370 | * is 2 * (datadisks) * chunksize where 'n' is the | 5509 | * is 2 * (datadisks) * chunksize where 'n' is the |
5371 | * number of raid devices | 5510 | * number of raid devices |
@@ -5385,13 +5524,48 @@ static int run(struct mddev *mddev) | |||
5385 | blk_queue_io_min(mddev->queue, chunk_size); | 5524 | blk_queue_io_min(mddev->queue, chunk_size); |
5386 | blk_queue_io_opt(mddev->queue, chunk_size * | 5525 | blk_queue_io_opt(mddev->queue, chunk_size * |
5387 | (conf->raid_disks - conf->max_degraded)); | 5526 | (conf->raid_disks - conf->max_degraded)); |
5527 | /* | ||
5528 | * We can only discard a whole stripe. It doesn't make sense to | ||
5529 | * discard data disk but write parity disk | ||
5530 | */ | ||
5531 | stripe = stripe * PAGE_SIZE; | ||
5532 | mddev->queue->limits.discard_alignment = stripe; | ||
5533 | mddev->queue->limits.discard_granularity = stripe; | ||
5534 | /* | ||
5535 | * unaligned part of discard request will be ignored, so can't | ||
5536 | * guarantee discard_zerors_data | ||
5537 | */ | ||
5538 | mddev->queue->limits.discard_zeroes_data = 0; | ||
5388 | 5539 | ||
5389 | rdev_for_each(rdev, mddev) { | 5540 | rdev_for_each(rdev, mddev) { |
5390 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 5541 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
5391 | rdev->data_offset << 9); | 5542 | rdev->data_offset << 9); |
5392 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 5543 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
5393 | rdev->new_data_offset << 9); | 5544 | rdev->new_data_offset << 9); |
5545 | /* | ||
5546 | * discard_zeroes_data is required, otherwise data | ||
5547 | * could be lost. Consider a scenario: discard a stripe | ||
5548 | * (the stripe could be inconsistent if | ||
5549 | * discard_zeroes_data is 0); write one disk of the | ||
5550 | * stripe (the stripe could be inconsistent again | ||
5551 | * depending on which disks are used to calculate | ||
5552 | * parity); the disk is broken; The stripe data of this | ||
5553 | * disk is lost. | ||
5554 | */ | ||
5555 | if (!blk_queue_discard(bdev_get_queue(rdev->bdev)) || | ||
5556 | !bdev_get_queue(rdev->bdev)-> | ||
5557 | limits.discard_zeroes_data) | ||
5558 | discard_supported = false; | ||
5394 | } | 5559 | } |
5560 | |||
5561 | if (discard_supported && | ||
5562 | mddev->queue->limits.max_discard_sectors >= stripe && | ||
5563 | mddev->queue->limits.discard_granularity >= stripe) | ||
5564 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, | ||
5565 | mddev->queue); | ||
5566 | else | ||
5567 | queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, | ||
5568 | mddev->queue); | ||
5395 | } | 5569 | } |
5396 | 5570 | ||
5397 | return 0; | 5571 | return 0; |
@@ -5702,7 +5876,8 @@ static int check_reshape(struct mddev *mddev) | |||
5702 | if (!check_stripe_cache(mddev)) | 5876 | if (!check_stripe_cache(mddev)) |
5703 | return -ENOSPC; | 5877 | return -ENOSPC; |
5704 | 5878 | ||
5705 | return resize_stripes(conf, conf->raid_disks + mddev->delta_disks); | 5879 | return resize_stripes(conf, (conf->previous_raid_disks |
5880 | + mddev->delta_disks)); | ||
5706 | } | 5881 | } |
5707 | 5882 | ||
5708 | static int raid5_start_reshape(struct mddev *mddev) | 5883 | static int raid5_start_reshape(struct mddev *mddev) |