summaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
authorArtur Paszkiewicz <artur.paszkiewicz@intel.com>2017-03-09 03:59:59 -0500
committerShaohua Li <shli@fb.com>2017-03-16 19:55:54 -0400
commit3418d036c81dcb604b7c7c71b209d5890a8418aa (patch)
treed02a31103e09f82858bf149ebcb511e12ed6065a /drivers/md/raid5.c
parentff875738edd44e3bc892d378deacc50bccc9d70c (diff)
raid5-ppl: Partial Parity Log write logging implementation
Implement the calculation of partial parity for a stripe and PPL write logging functionality. The description of PPL is added to the documentation. More details can be found in the comments in raid5-ppl.c. Attach a page for holding the partial parity data to stripe_head. Allocate it only if mddev has the MD_HAS_PPL flag set. Partial parity is the xor of not modified data chunks of a stripe and is calculated as follows: - reconstruct-write case: xor data from all not updated disks in a stripe - read-modify-write case: xor old data and parity from all updated disks in a stripe Implement it using the async_tx API and integrate into raid_run_ops(). It must be called when we still have access to old data, so do it when STRIPE_OP_BIODRAIN is set, but before ops_run_prexor5(). The result is stored into sh->ppl_page. Partial parity is not meaningful for full stripe write and is not stored in the log or used for recovery, so don't attempt to calculate it when stripe has STRIPE_FULL_WRITE. Put the PPL metadata structures to md_p.h because userspace tools (mdadm) will also need to read/write PPL. Warn about using PPL with enabled disk volatile write-back cache for now. It can be removed once disk cache flushing before writing PPL is implemented. Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com> Signed-off-by: Shaohua Li <shli@fb.com>
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c64
1 files changed, 61 insertions, 3 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index f575f40d2acb..6b86e0826afe 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -482,6 +482,11 @@ static void shrink_buffers(struct stripe_head *sh)
482 sh->dev[i].page = NULL; 482 sh->dev[i].page = NULL;
483 put_page(p); 483 put_page(p);
484 } 484 }
485
486 if (sh->ppl_page) {
487 put_page(sh->ppl_page);
488 sh->ppl_page = NULL;
489 }
485} 490}
486 491
487static int grow_buffers(struct stripe_head *sh, gfp_t gfp) 492static int grow_buffers(struct stripe_head *sh, gfp_t gfp)
@@ -498,6 +503,13 @@ static int grow_buffers(struct stripe_head *sh, gfp_t gfp)
498 sh->dev[i].page = page; 503 sh->dev[i].page = page;
499 sh->dev[i].orig_page = page; 504 sh->dev[i].orig_page = page;
500 } 505 }
506
507 if (raid5_has_ppl(sh->raid_conf)) {
508 sh->ppl_page = alloc_page(gfp);
509 if (!sh->ppl_page)
510 return 1;
511 }
512
501 return 0; 513 return 0;
502} 514}
503 515
@@ -746,7 +758,7 @@ static bool stripe_can_batch(struct stripe_head *sh)
746{ 758{
747 struct r5conf *conf = sh->raid_conf; 759 struct r5conf *conf = sh->raid_conf;
748 760
749 if (conf->log) 761 if (conf->log || raid5_has_ppl(conf))
750 return false; 762 return false;
751 return test_bit(STRIPE_BATCH_READY, &sh->state) && 763 return test_bit(STRIPE_BATCH_READY, &sh->state) &&
752 !test_bit(STRIPE_BITMAP_PENDING, &sh->state) && 764 !test_bit(STRIPE_BITMAP_PENDING, &sh->state) &&
@@ -2093,6 +2105,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
2093 async_tx_ack(tx); 2105 async_tx_ack(tx);
2094 } 2106 }
2095 2107
2108 if (test_bit(STRIPE_OP_PARTIAL_PARITY, &ops_request))
2109 tx = ops_run_partial_parity(sh, percpu, tx);
2110
2096 if (test_bit(STRIPE_OP_PREXOR, &ops_request)) { 2111 if (test_bit(STRIPE_OP_PREXOR, &ops_request)) {
2097 if (level < 6) 2112 if (level < 6)
2098 tx = ops_run_prexor5(sh, percpu, tx); 2113 tx = ops_run_prexor5(sh, percpu, tx);
@@ -3168,6 +3183,12 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
3168 s->locked++; 3183 s->locked++;
3169 } 3184 }
3170 3185
3186 if (raid5_has_ppl(sh->raid_conf) &&
3187 test_bit(STRIPE_OP_BIODRAIN, &s->ops_request) &&
3188 !test_bit(STRIPE_FULL_WRITE, &sh->state) &&
3189 test_bit(R5_Insync, &sh->dev[pd_idx].flags))
3190 set_bit(STRIPE_OP_PARTIAL_PARITY, &s->ops_request);
3191
3171 pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n", 3192 pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n",
3172 __func__, (unsigned long long)sh->sector, 3193 __func__, (unsigned long long)sh->sector,
3173 s->locked, s->ops_request); 3194 s->locked, s->ops_request);
@@ -3215,6 +3236,36 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
3215 if (*bip && (*bip)->bi_iter.bi_sector < bio_end_sector(bi)) 3236 if (*bip && (*bip)->bi_iter.bi_sector < bio_end_sector(bi))
3216 goto overlap; 3237 goto overlap;
3217 3238
3239 if (forwrite && raid5_has_ppl(conf)) {
3240 /*
3241 * With PPL only writes to consecutive data chunks within a
3242 * stripe are allowed because for a single stripe_head we can
3243 * only have one PPL entry at a time, which describes one data
3244 * range. Not really an overlap, but wait_for_overlap can be
3245 * used to handle this.
3246 */
3247 sector_t sector;
3248 sector_t first = 0;
3249 sector_t last = 0;
3250 int count = 0;
3251 int i;
3252
3253 for (i = 0; i < sh->disks; i++) {
3254 if (i != sh->pd_idx &&
3255 (i == dd_idx || sh->dev[i].towrite)) {
3256 sector = sh->dev[i].sector;
3257 if (count == 0 || sector < first)
3258 first = sector;
3259 if (sector > last)
3260 last = sector;
3261 count++;
3262 }
3263 }
3264
3265 if (first + conf->chunk_sectors * (count - 1) != last)
3266 goto overlap;
3267 }
3268
3218 if (!forwrite || previous) 3269 if (!forwrite || previous)
3219 clear_bit(STRIPE_BATCH_READY, &sh->state); 3270 clear_bit(STRIPE_BATCH_READY, &sh->state);
3220 3271
@@ -7208,6 +7259,13 @@ static int raid5_run(struct mddev *mddev)
7208 BUG_ON(mddev->delta_disks != 0); 7259 BUG_ON(mddev->delta_disks != 0);
7209 } 7260 }
7210 7261
7262 if (test_bit(MD_HAS_JOURNAL, &mddev->flags) &&
7263 test_bit(MD_HAS_PPL, &mddev->flags)) {
7264 pr_warn("md/raid:%s: using journal device and PPL not allowed - disabling PPL\n",
7265 mdname(mddev));
7266 clear_bit(MD_HAS_PPL, &mddev->flags);
7267 }
7268
7211 if (mddev->private == NULL) 7269 if (mddev->private == NULL)
7212 conf = setup_conf(mddev); 7270 conf = setup_conf(mddev);
7213 else 7271 else
@@ -7689,7 +7747,7 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors)
7689 sector_t newsize; 7747 sector_t newsize;
7690 struct r5conf *conf = mddev->private; 7748 struct r5conf *conf = mddev->private;
7691 7749
7692 if (conf->log) 7750 if (conf->log || raid5_has_ppl(conf))
7693 return -EINVAL; 7751 return -EINVAL;
7694 sectors &= ~((sector_t)conf->chunk_sectors - 1); 7752 sectors &= ~((sector_t)conf->chunk_sectors - 1);
7695 newsize = raid5_size(mddev, sectors, mddev->raid_disks); 7753 newsize = raid5_size(mddev, sectors, mddev->raid_disks);
@@ -7740,7 +7798,7 @@ static int check_reshape(struct mddev *mddev)
7740{ 7798{
7741 struct r5conf *conf = mddev->private; 7799 struct r5conf *conf = mddev->private;
7742 7800
7743 if (conf->log) 7801 if (conf->log || raid5_has_ppl(conf))
7744 return -EINVAL; 7802 return -EINVAL;
7745 if (mddev->delta_disks == 0 && 7803 if (mddev->delta_disks == 0 &&
7746 mddev->new_layout == mddev->layout && 7804 mddev->new_layout == mddev->layout &&