aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorShaohua Li <shli@kernel.org>2012-10-10 22:49:49 -0400
committerNeilBrown <neilb@suse.de>2012-10-10 22:49:49 -0400
commit9e44476851e91c86c98eb92b9bc27fb801f89072 (patch)
treec681fe9b8261f390d60fbbcce060fe6ed958c836 /drivers/md
parent620125f2bf8ff0c4969b79653b54d7bcc9d40637 (diff)
MD: raid5 avoid unnecessary zero page for trim
We want to avoid zero discarded dev page, because it's useless for discard. But if we don't zero it, another read/write hit such page in the cache and will get inconsistent data. To avoid zero the page, we don't set R5_UPTODATE flag after construction is done. In this way, discard write request is still issued and finished, but read will not hit the page. If the stripe gets accessed soon, we need reread the stripe, but since the chance is low, the reread isn't a big deal. Signed-off-by: Shaohua Li <shli@fusionio.com> Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/raid5.c35
1 files changed, 17 insertions, 18 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 74dcf19cfe68..758b77296404 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -547,7 +547,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
547 rw = WRITE_FUA; 547 rw = WRITE_FUA;
548 else 548 else
549 rw = WRITE; 549 rw = WRITE;
550 if (test_and_clear_bit(R5_Discard, &sh->dev[i].flags)) 550 if (test_bit(R5_Discard, &sh->dev[i].flags))
551 rw |= REQ_DISCARD; 551 rw |= REQ_DISCARD;
552 } else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) 552 } else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
553 rw = READ; 553 rw = READ;
@@ -1172,11 +1172,9 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
1172 set_bit(R5_WantFUA, &dev->flags); 1172 set_bit(R5_WantFUA, &dev->flags);
1173 if (wbi->bi_rw & REQ_SYNC) 1173 if (wbi->bi_rw & REQ_SYNC)
1174 set_bit(R5_SyncIO, &dev->flags); 1174 set_bit(R5_SyncIO, &dev->flags);
1175 if (wbi->bi_rw & REQ_DISCARD) { 1175 if (wbi->bi_rw & REQ_DISCARD)
1176 memset(page_address(dev->page), 0,
1177 STRIPE_SECTORS << 9);
1178 set_bit(R5_Discard, &dev->flags); 1176 set_bit(R5_Discard, &dev->flags);
1179 } else 1177 else
1180 tx = async_copy_data(1, wbi, dev->page, 1178 tx = async_copy_data(1, wbi, dev->page,
1181 dev->sector, tx); 1179 dev->sector, tx);
1182 wbi = r5_next_bio(wbi, dev->sector); 1180 wbi = r5_next_bio(wbi, dev->sector);
@@ -1194,7 +1192,7 @@ static void ops_complete_reconstruct(void *stripe_head_ref)
1194 int pd_idx = sh->pd_idx; 1192 int pd_idx = sh->pd_idx;
1195 int qd_idx = sh->qd_idx; 1193 int qd_idx = sh->qd_idx;
1196 int i; 1194 int i;
1197 bool fua = false, sync = false; 1195 bool fua = false, sync = false, discard = false;
1198 1196
1199 pr_debug("%s: stripe %llu\n", __func__, 1197 pr_debug("%s: stripe %llu\n", __func__,
1200 (unsigned long long)sh->sector); 1198 (unsigned long long)sh->sector);
@@ -1202,13 +1200,15 @@ static void ops_complete_reconstruct(void *stripe_head_ref)
1202 for (i = disks; i--; ) { 1200 for (i = disks; i--; ) {
1203 fua |= test_bit(R5_WantFUA, &sh->dev[i].flags); 1201 fua |= test_bit(R5_WantFUA, &sh->dev[i].flags);
1204 sync |= test_bit(R5_SyncIO, &sh->dev[i].flags); 1202 sync |= test_bit(R5_SyncIO, &sh->dev[i].flags);
1203 discard |= test_bit(R5_Discard, &sh->dev[i].flags);
1205 } 1204 }
1206 1205
1207 for (i = disks; i--; ) { 1206 for (i = disks; i--; ) {
1208 struct r5dev *dev = &sh->dev[i]; 1207 struct r5dev *dev = &sh->dev[i];
1209 1208
1210 if (dev->written || i == pd_idx || i == qd_idx) { 1209 if (dev->written || i == pd_idx || i == qd_idx) {
1211 set_bit(R5_UPTODATE, &dev->flags); 1210 if (!discard)
1211 set_bit(R5_UPTODATE, &dev->flags);
1212 if (fua) 1212 if (fua)
1213 set_bit(R5_WantFUA, &dev->flags); 1213 set_bit(R5_WantFUA, &dev->flags);
1214 if (sync) 1214 if (sync)
@@ -1252,8 +1252,6 @@ ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
1252 } 1252 }
1253 if (i >= sh->disks) { 1253 if (i >= sh->disks) {
1254 atomic_inc(&sh->count); 1254 atomic_inc(&sh->count);
1255 memset(page_address(sh->dev[pd_idx].page), 0,
1256 STRIPE_SECTORS << 9);
1257 set_bit(R5_Discard, &sh->dev[pd_idx].flags); 1255 set_bit(R5_Discard, &sh->dev[pd_idx].flags);
1258 ops_complete_reconstruct(sh); 1256 ops_complete_reconstruct(sh);
1259 return; 1257 return;
@@ -1314,10 +1312,6 @@ ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
1314 } 1312 }
1315 if (i >= sh->disks) { 1313 if (i >= sh->disks) {
1316 atomic_inc(&sh->count); 1314 atomic_inc(&sh->count);
1317 memset(page_address(sh->dev[sh->pd_idx].page), 0,
1318 STRIPE_SECTORS << 9);
1319 memset(page_address(sh->dev[sh->qd_idx].page), 0,
1320 STRIPE_SECTORS << 9);
1321 set_bit(R5_Discard, &sh->dev[sh->pd_idx].flags); 1315 set_bit(R5_Discard, &sh->dev[sh->pd_idx].flags);
1322 set_bit(R5_Discard, &sh->dev[sh->qd_idx].flags); 1316 set_bit(R5_Discard, &sh->dev[sh->qd_idx].flags);
1323 ops_complete_reconstruct(sh); 1317 ops_complete_reconstruct(sh);
@@ -2775,7 +2769,8 @@ static void handle_stripe_clean_event(struct r5conf *conf,
2775 if (sh->dev[i].written) { 2769 if (sh->dev[i].written) {
2776 dev = &sh->dev[i]; 2770 dev = &sh->dev[i];
2777 if (!test_bit(R5_LOCKED, &dev->flags) && 2771 if (!test_bit(R5_LOCKED, &dev->flags) &&
2778 test_bit(R5_UPTODATE, &dev->flags)) { 2772 (test_bit(R5_UPTODATE, &dev->flags) ||
2773 test_and_clear_bit(R5_Discard, &dev->flags))) {
2779 /* We can return any write requests */ 2774 /* We can return any write requests */
2780 struct bio *wbi, *wbi2; 2775 struct bio *wbi, *wbi2;
2781 pr_debug("Return write for disc %d\n", i); 2776 pr_debug("Return write for disc %d\n", i);
@@ -3493,10 +3488,12 @@ static void handle_stripe(struct stripe_head *sh)
3493 if (s.written && 3488 if (s.written &&
3494 (s.p_failed || ((test_bit(R5_Insync, &pdev->flags) 3489 (s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
3495 && !test_bit(R5_LOCKED, &pdev->flags) 3490 && !test_bit(R5_LOCKED, &pdev->flags)
3496 && test_bit(R5_UPTODATE, &pdev->flags)))) && 3491 && (test_bit(R5_UPTODATE, &pdev->flags) ||
3492 test_bit(R5_Discard, &pdev->flags))))) &&
3497 (s.q_failed || ((test_bit(R5_Insync, &qdev->flags) 3493 (s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
3498 && !test_bit(R5_LOCKED, &qdev->flags) 3494 && !test_bit(R5_LOCKED, &qdev->flags)
3499 && test_bit(R5_UPTODATE, &qdev->flags))))) 3495 && (test_bit(R5_UPTODATE, &qdev->flags) ||
3496 test_bit(R5_Discard, &qdev->flags))))))
3500 handle_stripe_clean_event(conf, sh, disks, &s.return_bi); 3497 handle_stripe_clean_event(conf, sh, disks, &s.return_bi);
3501 3498
3502 /* Now we might consider reading some blocks, either to check/generate 3499 /* Now we might consider reading some blocks, either to check/generate
@@ -3523,9 +3520,11 @@ static void handle_stripe(struct stripe_head *sh)
3523 /* All the 'written' buffers and the parity block are ready to 3520 /* All the 'written' buffers and the parity block are ready to
3524 * be written back to disk 3521 * be written back to disk
3525 */ 3522 */
3526 BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags)); 3523 BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags) &&
3524 !test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags));
3527 BUG_ON(sh->qd_idx >= 0 && 3525 BUG_ON(sh->qd_idx >= 0 &&
3528 !test_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags)); 3526 !test_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags) &&
3527 !test_bit(R5_Discard, &sh->dev[sh->qd_idx].flags));
3529 for (i = disks; i--; ) { 3528 for (i = disks; i--; ) {
3530 struct r5dev *dev = &sh->dev[i]; 3529 struct r5dev *dev = &sh->dev[i];
3531 if (test_bit(R5_LOCKED, &dev->flags) && 3530 if (test_bit(R5_LOCKED, &dev->flags) &&