aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-07 15:41:48 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-07 15:41:48 -0400
commit3645e6d0dc80be4376f87acc9ee527768387c909 (patch)
tree78cc68cb09c9d24a6bcfaf842a3bca671ed53ee0
parent15d8ffc96464f6571ecf22043c45fad659f11bdd (diff)
parente8a27f836f165c26f867ece7f31eb5c811692319 (diff)
Merge tag 'md/4.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull MD updates from Shaohua Li: "This update mainly fixes bugs: - Make raid5 ppl support several ppl from Pawel - Several raid5-cache bug fixes from Song - Bitmap fixes from Neil and Me - One raid1/10 regression fix since 4.12 from Me - Other small fixes and cleanup" * tag 'md/4.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md: md/bitmap: disable bitmap_resize for file-backed bitmaps. raid5-ppl: Recovery support for multiple partial parity logs md: Runtime support for multiple ppls md/raid0: attach correct cgroup info in bio lib/raid6: align AVX512 constants to 512 bits, not bytes raid5: remove raid5_build_block md/r5cache: call mddev_lock/unlock() in r5c_journal_mode_show md: replace seq_release_private with seq_release md: notify about new spare disk in the container md/raid1/10: reset bio allocated from mempool md/raid5: release/flush io in raid5_do_work() md/bitmap: copy correct data for bitmap super
-rw-r--r--block/bio.c2
-rw-r--r--drivers/md/bitmap.c9
-rw-r--r--drivers/md/md.c20
-rw-r--r--drivers/md/md.h1
-rw-r--r--drivers/md/raid0.c4
-rw-r--r--drivers/md/raid1.c22
-rw-r--r--drivers/md/raid10.c35
-rw-r--r--drivers/md/raid5-cache.c12
-rw-r--r--drivers/md/raid5-ppl.c171
-rw-r--r--drivers/md/raid5.c16
-rw-r--r--include/uapi/linux/raid/md_p.h4
-rw-r--r--lib/raid6/avx512.c2
12 files changed, 230 insertions, 68 deletions
diff --git a/block/bio.c b/block/bio.c
index 6745759028da..b38e962fa83e 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -2089,7 +2089,7 @@ void bio_clone_blkcg_association(struct bio *dst, struct bio *src)
2089 if (src->bi_css) 2089 if (src->bi_css)
2090 WARN_ON(bio_associate_blkcg(dst, src->bi_css)); 2090 WARN_ON(bio_associate_blkcg(dst, src->bi_css));
2091} 2091}
2092 2092EXPORT_SYMBOL_GPL(bio_clone_blkcg_association);
2093#endif /* CONFIG_BLK_CGROUP */ 2093#endif /* CONFIG_BLK_CGROUP */
2094 2094
2095static void __init biovec_init_slabs(void) 2095static void __init biovec_init_slabs(void)
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 40f3cd7eab0f..d2121637b4ab 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -625,7 +625,7 @@ re_read:
625 err = read_sb_page(bitmap->mddev, 625 err = read_sb_page(bitmap->mddev,
626 offset, 626 offset,
627 sb_page, 627 sb_page,
628 0, sizeof(bitmap_super_t)); 628 0, PAGE_SIZE);
629 } 629 }
630 if (err) 630 if (err)
631 return err; 631 return err;
@@ -2058,6 +2058,11 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
2058 long pages; 2058 long pages;
2059 struct bitmap_page *new_bp; 2059 struct bitmap_page *new_bp;
2060 2060
2061 if (bitmap->storage.file && !init) {
2062 pr_info("md: cannot resize file-based bitmap\n");
2063 return -EINVAL;
2064 }
2065
2061 if (chunksize == 0) { 2066 if (chunksize == 0) {
2062 /* If there is enough space, leave the chunk size unchanged, 2067 /* If there is enough space, leave the chunk size unchanged,
2063 * else increase by factor of two until there is enough space. 2068 * else increase by factor of two until there is enough space.
@@ -2118,7 +2123,7 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
2118 if (store.sb_page && bitmap->storage.sb_page) 2123 if (store.sb_page && bitmap->storage.sb_page)
2119 memcpy(page_address(store.sb_page), 2124 memcpy(page_address(store.sb_page),
2120 page_address(bitmap->storage.sb_page), 2125 page_address(bitmap->storage.sb_page),
2121 sizeof(bitmap_super_t)); 2126 PAGE_SIZE);
2122 bitmap_file_unmap(&bitmap->storage); 2127 bitmap_file_unmap(&bitmap->storage);
2123 bitmap->storage = store; 2128 bitmap->storage = store;
2124 2129
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 078c6f3b7e53..08fcaebc61bd 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1538,7 +1538,8 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
1538 } else if (sb->bblog_offset != 0) 1538 } else if (sb->bblog_offset != 0)
1539 rdev->badblocks.shift = 0; 1539 rdev->badblocks.shift = 0;
1540 1540
1541 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) { 1541 if ((le32_to_cpu(sb->feature_map) &
1542 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS))) {
1542 rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset); 1543 rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset);
1543 rdev->ppl.size = le16_to_cpu(sb->ppl.size); 1544 rdev->ppl.size = le16_to_cpu(sb->ppl.size);
1544 rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset; 1545 rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
@@ -1657,10 +1658,15 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1657 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL) 1658 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
1658 set_bit(MD_HAS_JOURNAL, &mddev->flags); 1659 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1659 1660
1660 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) { 1661 if (le32_to_cpu(sb->feature_map) &
1662 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS)) {
1661 if (le32_to_cpu(sb->feature_map) & 1663 if (le32_to_cpu(sb->feature_map) &
1662 (MD_FEATURE_BITMAP_OFFSET | MD_FEATURE_JOURNAL)) 1664 (MD_FEATURE_BITMAP_OFFSET | MD_FEATURE_JOURNAL))
1663 return -EINVAL; 1665 return -EINVAL;
1666 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) &&
1667 (le32_to_cpu(sb->feature_map) &
1668 MD_FEATURE_MULTIPLE_PPLS))
1669 return -EINVAL;
1664 set_bit(MD_HAS_PPL, &mddev->flags); 1670 set_bit(MD_HAS_PPL, &mddev->flags);
1665 } 1671 }
1666 } else if (mddev->pers == NULL) { 1672 } else if (mddev->pers == NULL) {
@@ -1877,7 +1883,11 @@ retry:
1877 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL); 1883 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
1878 1884
1879 if (test_bit(MD_HAS_PPL, &mddev->flags)) { 1885 if (test_bit(MD_HAS_PPL, &mddev->flags)) {
1880 sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL); 1886 if (test_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags))
1887 sb->feature_map |=
1888 cpu_to_le32(MD_FEATURE_MULTIPLE_PPLS);
1889 else
1890 sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL);
1881 sb->ppl.offset = cpu_to_le16(rdev->ppl.offset); 1891 sb->ppl.offset = cpu_to_le16(rdev->ppl.offset);
1882 sb->ppl.size = cpu_to_le16(rdev->ppl.size); 1892 sb->ppl.size = cpu_to_le16(rdev->ppl.size);
1883 } 1893 }
@@ -4285,6 +4295,8 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4285 if (err) 4295 if (err)
4286 export_rdev(rdev); 4296 export_rdev(rdev);
4287 mddev_unlock(mddev); 4297 mddev_unlock(mddev);
4298 if (!err)
4299 md_new_event(mddev);
4288 return err ? err : len; 4300 return err ? err : len;
4289} 4301}
4290 4302
@@ -7838,7 +7850,7 @@ static const struct file_operations md_seq_fops = {
7838 .open = md_seq_open, 7850 .open = md_seq_open,
7839 .read = seq_read, 7851 .read = seq_read,
7840 .llseek = seq_lseek, 7852 .llseek = seq_lseek,
7841 .release = seq_release_private, 7853 .release = seq_release,
7842 .poll = mdstat_poll, 7854 .poll = mdstat_poll,
7843}; 7855};
7844 7856
diff --git a/drivers/md/md.h b/drivers/md/md.h
index c0d436fb88f0..561d22b9a9a8 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -236,6 +236,7 @@ enum mddev_flags {
236 * never cause the array to become failed. 236 * never cause the array to become failed.
237 */ 237 */
238 MD_HAS_PPL, /* The raid array has PPL feature set */ 238 MD_HAS_PPL, /* The raid array has PPL feature set */
239 MD_HAS_MULTIPLE_PPLS, /* The raid array has multiple PPLs feature set */
239}; 240};
240 241
241enum mddev_sb_flags { 242enum mddev_sb_flags {
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 05a4521b832f..5a00fc118470 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -30,7 +30,8 @@
30 ((1L << MD_HAS_JOURNAL) | \ 30 ((1L << MD_HAS_JOURNAL) | \
31 (1L << MD_JOURNAL_CLEAN) | \ 31 (1L << MD_JOURNAL_CLEAN) | \
32 (1L << MD_FAILFAST_SUPPORTED) |\ 32 (1L << MD_FAILFAST_SUPPORTED) |\
33 (1L << MD_HAS_PPL)) 33 (1L << MD_HAS_PPL) | \
34 (1L << MD_HAS_MULTIPLE_PPLS))
34 35
35static int raid0_congested(struct mddev *mddev, int bits) 36static int raid0_congested(struct mddev *mddev, int bits)
36{ 37{
@@ -539,6 +540,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
539 !discard_bio) 540 !discard_bio)
540 continue; 541 continue;
541 bio_chain(discard_bio, bio); 542 bio_chain(discard_bio, bio);
543 bio_clone_blkcg_association(discard_bio, bio);
542 if (mddev->gendisk) 544 if (mddev->gendisk)
543 trace_block_bio_remap(bdev_get_queue(rdev->bdev), 545 trace_block_bio_remap(bdev_get_queue(rdev->bdev),
544 discard_bio, disk_devt(mddev->gendisk), 546 discard_bio, disk_devt(mddev->gendisk),
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index baf5e358d22a..f3f3e40dc9d8 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -48,7 +48,8 @@
48#define UNSUPPORTED_MDDEV_FLAGS \ 48#define UNSUPPORTED_MDDEV_FLAGS \
49 ((1L << MD_HAS_JOURNAL) | \ 49 ((1L << MD_HAS_JOURNAL) | \
50 (1L << MD_JOURNAL_CLEAN) | \ 50 (1L << MD_JOURNAL_CLEAN) | \
51 (1L << MD_HAS_PPL)) 51 (1L << MD_HAS_PPL) | \
52 (1L << MD_HAS_MULTIPLE_PPLS))
52 53
53/* 54/*
54 * Number of guaranteed r1bios in case of extreme VM load: 55 * Number of guaranteed r1bios in case of extreme VM load:
@@ -2560,6 +2561,23 @@ static int init_resync(struct r1conf *conf)
2560 return 0; 2561 return 0;
2561} 2562}
2562 2563
2564static struct r1bio *raid1_alloc_init_r1buf(struct r1conf *conf)
2565{
2566 struct r1bio *r1bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
2567 struct resync_pages *rps;
2568 struct bio *bio;
2569 int i;
2570
2571 for (i = conf->poolinfo->raid_disks; i--; ) {
2572 bio = r1bio->bios[i];
2573 rps = bio->bi_private;
2574 bio_reset(bio);
2575 bio->bi_private = rps;
2576 }
2577 r1bio->master_bio = NULL;
2578 return r1bio;
2579}
2580
2563/* 2581/*
2564 * perform a "sync" on one "block" 2582 * perform a "sync" on one "block"
2565 * 2583 *
@@ -2645,7 +2663,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
2645 2663
2646 bitmap_cond_end_sync(mddev->bitmap, sector_nr, 2664 bitmap_cond_end_sync(mddev->bitmap, sector_nr,
2647 mddev_is_clustered(mddev) && (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high)); 2665 mddev_is_clustered(mddev) && (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high));
2648 r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); 2666 r1_bio = raid1_alloc_init_r1buf(conf);
2649 2667
2650 raise_barrier(conf, sector_nr); 2668 raise_barrier(conf, sector_nr);
2651 2669
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index d1f948e371e0..374df5796649 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2796,6 +2796,35 @@ static int init_resync(struct r10conf *conf)
2796 return 0; 2796 return 0;
2797} 2797}
2798 2798
2799static struct r10bio *raid10_alloc_init_r10buf(struct r10conf *conf)
2800{
2801 struct r10bio *r10bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
2802 struct rsync_pages *rp;
2803 struct bio *bio;
2804 int nalloc;
2805 int i;
2806
2807 if (test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery) ||
2808 test_bit(MD_RECOVERY_RESHAPE, &conf->mddev->recovery))
2809 nalloc = conf->copies; /* resync */
2810 else
2811 nalloc = 2; /* recovery */
2812
2813 for (i = 0; i < nalloc; i++) {
2814 bio = r10bio->devs[i].bio;
2815 rp = bio->bi_private;
2816 bio_reset(bio);
2817 bio->bi_private = rp;
2818 bio = r10bio->devs[i].repl_bio;
2819 if (bio) {
2820 rp = bio->bi_private;
2821 bio_reset(bio);
2822 bio->bi_private = rp;
2823 }
2824 }
2825 return r10bio;
2826}
2827
2799/* 2828/*
2800 * perform a "sync" on one "block" 2829 * perform a "sync" on one "block"
2801 * 2830 *
@@ -3025,7 +3054,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
3025 atomic_inc(&mreplace->nr_pending); 3054 atomic_inc(&mreplace->nr_pending);
3026 rcu_read_unlock(); 3055 rcu_read_unlock();
3027 3056
3028 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); 3057 r10_bio = raid10_alloc_init_r10buf(conf);
3029 r10_bio->state = 0; 3058 r10_bio->state = 0;
3030 raise_barrier(conf, rb2 != NULL); 3059 raise_barrier(conf, rb2 != NULL);
3031 atomic_set(&r10_bio->remaining, 0); 3060 atomic_set(&r10_bio->remaining, 0);
@@ -3234,7 +3263,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
3234 } 3263 }
3235 if (sync_blocks < max_sync) 3264 if (sync_blocks < max_sync)
3236 max_sync = sync_blocks; 3265 max_sync = sync_blocks;
3237 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); 3266 r10_bio = raid10_alloc_init_r10buf(conf);
3238 r10_bio->state = 0; 3267 r10_bio->state = 0;
3239 3268
3240 r10_bio->mddev = mddev; 3269 r10_bio->mddev = mddev;
@@ -4358,7 +4387,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
4358 4387
4359read_more: 4388read_more:
4360 /* Now schedule reads for blocks from sector_nr to last */ 4389 /* Now schedule reads for blocks from sector_nr to last */
4361 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); 4390 r10_bio = raid10_alloc_init_r10buf(conf);
4362 r10_bio->state = 0; 4391 r10_bio->state = 0;
4363 raise_barrier(conf, sectors_done != 0); 4392 raise_barrier(conf, sectors_done != 0);
4364 atomic_set(&r10_bio->remaining, 0); 4393 atomic_set(&r10_bio->remaining, 0);
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 5d7da1c700aa..0b7406ac8ce1 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -2529,11 +2529,18 @@ static void r5l_write_super(struct r5l_log *log, sector_t cp)
2529 2529
2530static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page) 2530static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page)
2531{ 2531{
2532 struct r5conf *conf = mddev->private; 2532 struct r5conf *conf;
2533 int ret; 2533 int ret;
2534 2534
2535 if (!conf->log) 2535 ret = mddev_lock(mddev);
2536 if (ret)
2537 return ret;
2538
2539 conf = mddev->private;
2540 if (!conf || !conf->log) {
2541 mddev_unlock(mddev);
2536 return 0; 2542 return 0;
2543 }
2537 2544
2538 switch (conf->log->r5c_journal_mode) { 2545 switch (conf->log->r5c_journal_mode) {
2539 case R5C_JOURNAL_MODE_WRITE_THROUGH: 2546 case R5C_JOURNAL_MODE_WRITE_THROUGH:
@@ -2551,6 +2558,7 @@ static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page)
2551 default: 2558 default:
2552 ret = 0; 2559 ret = 0;
2553 } 2560 }
2561 mddev_unlock(mddev);
2554 return ret; 2562 return ret;
2555} 2563}
2556 2564
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 1e237c40d6fa..cd026c88f7ef 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -87,6 +87,8 @@
87 * The current io_unit accepting new stripes is always at the end of the list. 87 * The current io_unit accepting new stripes is always at the end of the list.
88 */ 88 */
89 89
90#define PPL_SPACE_SIZE (128 * 1024)
91
90struct ppl_conf { 92struct ppl_conf {
91 struct mddev *mddev; 93 struct mddev *mddev;
92 94
@@ -122,6 +124,10 @@ struct ppl_log {
122 * always at the end of io_list */ 124 * always at the end of io_list */
123 spinlock_t io_list_lock; 125 spinlock_t io_list_lock;
124 struct list_head io_list; /* all io_units of this log */ 126 struct list_head io_list; /* all io_units of this log */
127
128 sector_t next_io_sector;
129 unsigned int entry_space;
130 bool use_multippl;
125}; 131};
126 132
127#define PPL_IO_INLINE_BVECS 32 133#define PPL_IO_INLINE_BVECS 32
@@ -264,13 +270,12 @@ static int ppl_log_stripe(struct ppl_log *log, struct stripe_head *sh)
264 int i; 270 int i;
265 sector_t data_sector = 0; 271 sector_t data_sector = 0;
266 int data_disks = 0; 272 int data_disks = 0;
267 unsigned int entry_space = (log->rdev->ppl.size << 9) - PPL_HEADER_SIZE;
268 struct r5conf *conf = sh->raid_conf; 273 struct r5conf *conf = sh->raid_conf;
269 274
270 pr_debug("%s: stripe: %llu\n", __func__, (unsigned long long)sh->sector); 275 pr_debug("%s: stripe: %llu\n", __func__, (unsigned long long)sh->sector);
271 276
272 /* check if current io_unit is full */ 277 /* check if current io_unit is full */
273 if (io && (io->pp_size == entry_space || 278 if (io && (io->pp_size == log->entry_space ||
274 io->entries_count == PPL_HDR_MAX_ENTRIES)) { 279 io->entries_count == PPL_HDR_MAX_ENTRIES)) {
275 pr_debug("%s: add io_unit blocked by seq: %llu\n", 280 pr_debug("%s: add io_unit blocked by seq: %llu\n",
276 __func__, io->seq); 281 __func__, io->seq);
@@ -451,12 +456,25 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
451 pplhdr->entries_count = cpu_to_le32(io->entries_count); 456 pplhdr->entries_count = cpu_to_le32(io->entries_count);
452 pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PPL_HEADER_SIZE)); 457 pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PPL_HEADER_SIZE));
453 458
459 /* Rewind the buffer if current PPL is larger then remaining space */
460 if (log->use_multippl &&
461 log->rdev->ppl.sector + log->rdev->ppl.size - log->next_io_sector <
462 (PPL_HEADER_SIZE + io->pp_size) >> 9)
463 log->next_io_sector = log->rdev->ppl.sector;
464
465
454 bio->bi_end_io = ppl_log_endio; 466 bio->bi_end_io = ppl_log_endio;
455 bio->bi_opf = REQ_OP_WRITE | REQ_FUA; 467 bio->bi_opf = REQ_OP_WRITE | REQ_FUA;
456 bio_set_dev(bio, log->rdev->bdev); 468 bio_set_dev(bio, log->rdev->bdev);
457 bio->bi_iter.bi_sector = log->rdev->ppl.sector; 469 bio->bi_iter.bi_sector = log->next_io_sector;
458 bio_add_page(bio, io->header_page, PAGE_SIZE, 0); 470 bio_add_page(bio, io->header_page, PAGE_SIZE, 0);
459 471
472 pr_debug("%s: log->current_io_sector: %llu\n", __func__,
473 (unsigned long long)log->next_io_sector);
474
475 if (log->use_multippl)
476 log->next_io_sector += (PPL_HEADER_SIZE + io->pp_size) >> 9;
477
460 list_for_each_entry(sh, &io->stripe_list, log_list) { 478 list_for_each_entry(sh, &io->stripe_list, log_list) {
461 /* entries for full stripe writes have no partial parity */ 479 /* entries for full stripe writes have no partial parity */
462 if (test_bit(STRIPE_FULL_WRITE, &sh->state)) 480 if (test_bit(STRIPE_FULL_WRITE, &sh->state))
@@ -813,12 +831,14 @@ out:
813 return ret; 831 return ret;
814} 832}
815 833
816static int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr) 834static int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr,
835 sector_t offset)
817{ 836{
818 struct ppl_conf *ppl_conf = log->ppl_conf; 837 struct ppl_conf *ppl_conf = log->ppl_conf;
819 struct md_rdev *rdev = log->rdev; 838 struct md_rdev *rdev = log->rdev;
820 struct mddev *mddev = rdev->mddev; 839 struct mddev *mddev = rdev->mddev;
821 sector_t ppl_sector = rdev->ppl.sector + (PPL_HEADER_SIZE >> 9); 840 sector_t ppl_sector = rdev->ppl.sector + offset +
841 (PPL_HEADER_SIZE >> 9);
822 struct page *page; 842 struct page *page;
823 int i; 843 int i;
824 int ret = 0; 844 int ret = 0;
@@ -902,6 +922,9 @@ static int ppl_write_empty_header(struct ppl_log *log)
902 return -ENOMEM; 922 return -ENOMEM;
903 923
904 pplhdr = page_address(page); 924 pplhdr = page_address(page);
925 /* zero out PPL space to avoid collision with old PPLs */
926 blkdev_issue_zeroout(rdev->bdev, rdev->ppl.sector,
927 log->rdev->ppl.size, GFP_NOIO, 0);
905 memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED); 928 memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED);
906 pplhdr->signature = cpu_to_le32(log->ppl_conf->signature); 929 pplhdr->signature = cpu_to_le32(log->ppl_conf->signature);
907 pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PAGE_SIZE)); 930 pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PAGE_SIZE));
@@ -922,63 +945,110 @@ static int ppl_load_distributed(struct ppl_log *log)
922 struct ppl_conf *ppl_conf = log->ppl_conf; 945 struct ppl_conf *ppl_conf = log->ppl_conf;
923 struct md_rdev *rdev = log->rdev; 946 struct md_rdev *rdev = log->rdev;
924 struct mddev *mddev = rdev->mddev; 947 struct mddev *mddev = rdev->mddev;
925 struct page *page; 948 struct page *page, *page2, *tmp;
926 struct ppl_header *pplhdr; 949 struct ppl_header *pplhdr = NULL, *prev_pplhdr = NULL;
927 u32 crc, crc_stored; 950 u32 crc, crc_stored;
928 u32 signature; 951 u32 signature;
929 int ret = 0; 952 int ret = 0, i;
953 sector_t pplhdr_offset = 0, prev_pplhdr_offset = 0;
930 954
931 pr_debug("%s: disk: %d\n", __func__, rdev->raid_disk); 955 pr_debug("%s: disk: %d\n", __func__, rdev->raid_disk);
932 956 /* read PPL headers, find the recent one */
933 /* read PPL header */
934 page = alloc_page(GFP_KERNEL); 957 page = alloc_page(GFP_KERNEL);
935 if (!page) 958 if (!page)
936 return -ENOMEM; 959 return -ENOMEM;
937 960
938 if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset, 961 page2 = alloc_page(GFP_KERNEL);
939 PAGE_SIZE, page, REQ_OP_READ, 0, false)) { 962 if (!page2) {
940 md_error(mddev, rdev); 963 __free_page(page);
941 ret = -EIO; 964 return -ENOMEM;
942 goto out;
943 } 965 }
944 pplhdr = page_address(page);
945 966
946 /* check header validity */ 967 /* searching ppl area for latest ppl */
947 crc_stored = le32_to_cpu(pplhdr->checksum); 968 while (pplhdr_offset < rdev->ppl.size - (PPL_HEADER_SIZE >> 9)) {
948 pplhdr->checksum = 0; 969 if (!sync_page_io(rdev,
949 crc = ~crc32c_le(~0, pplhdr, PAGE_SIZE); 970 rdev->ppl.sector - rdev->data_offset +
971 pplhdr_offset, PAGE_SIZE, page, REQ_OP_READ,
972 0, false)) {
973 md_error(mddev, rdev);
974 ret = -EIO;
975 /* if not able to read - don't recover any PPL */
976 pplhdr = NULL;
977 break;
978 }
979 pplhdr = page_address(page);
980
981 /* check header validity */
982 crc_stored = le32_to_cpu(pplhdr->checksum);
983 pplhdr->checksum = 0;
984 crc = ~crc32c_le(~0, pplhdr, PAGE_SIZE);
985
986 if (crc_stored != crc) {
987 pr_debug("%s: ppl header crc does not match: stored: 0x%x calculated: 0x%x (offset: %llu)\n",
988 __func__, crc_stored, crc,
989 (unsigned long long)pplhdr_offset);
990 pplhdr = prev_pplhdr;
991 pplhdr_offset = prev_pplhdr_offset;
992 break;
993 }
950 994
951 if (crc_stored != crc) { 995 signature = le32_to_cpu(pplhdr->signature);
952 pr_debug("%s: ppl header crc does not match: stored: 0x%x calculated: 0x%x\n",
953 __func__, crc_stored, crc);
954 ppl_conf->mismatch_count++;
955 goto out;
956 }
957 996
958 signature = le32_to_cpu(pplhdr->signature); 997 if (mddev->external) {
998 /*
999 * For external metadata the header signature is set and
1000 * validated in userspace.
1001 */
1002 ppl_conf->signature = signature;
1003 } else if (ppl_conf->signature != signature) {
1004 pr_debug("%s: ppl header signature does not match: stored: 0x%x configured: 0x%x (offset: %llu)\n",
1005 __func__, signature, ppl_conf->signature,
1006 (unsigned long long)pplhdr_offset);
1007 pplhdr = prev_pplhdr;
1008 pplhdr_offset = prev_pplhdr_offset;
1009 break;
1010 }
959 1011
960 if (mddev->external) { 1012 if (prev_pplhdr && le64_to_cpu(prev_pplhdr->generation) >
961 /* 1013 le64_to_cpu(pplhdr->generation)) {
962 * For external metadata the header signature is set and 1014 /* previous was newest */
963 * validated in userspace. 1015 pplhdr = prev_pplhdr;
964 */ 1016 pplhdr_offset = prev_pplhdr_offset;
965 ppl_conf->signature = signature; 1017 break;
966 } else if (ppl_conf->signature != signature) { 1018 }
967 pr_debug("%s: ppl header signature does not match: stored: 0x%x configured: 0x%x\n", 1019
968 __func__, signature, ppl_conf->signature); 1020 prev_pplhdr_offset = pplhdr_offset;
969 ppl_conf->mismatch_count++; 1021 prev_pplhdr = pplhdr;
970 goto out; 1022
1023 tmp = page;
1024 page = page2;
1025 page2 = tmp;
1026
1027 /* calculate next potential ppl offset */
1028 for (i = 0; i < le32_to_cpu(pplhdr->entries_count); i++)
1029 pplhdr_offset +=
1030 le32_to_cpu(pplhdr->entries[i].pp_size) >> 9;
1031 pplhdr_offset += PPL_HEADER_SIZE >> 9;
971 } 1032 }
972 1033
1034 /* no valid ppl found */
1035 if (!pplhdr)
1036 ppl_conf->mismatch_count++;
1037 else
1038 pr_debug("%s: latest PPL found at offset: %llu, with generation: %llu\n",
1039 __func__, (unsigned long long)pplhdr_offset,
1040 le64_to_cpu(pplhdr->generation));
1041
973 /* attempt to recover from log if we are starting a dirty array */ 1042 /* attempt to recover from log if we are starting a dirty array */
974 if (!mddev->pers && mddev->recovery_cp != MaxSector) 1043 if (pplhdr && !mddev->pers && mddev->recovery_cp != MaxSector)
975 ret = ppl_recover(log, pplhdr); 1044 ret = ppl_recover(log, pplhdr, pplhdr_offset);
976out: 1045
977 /* write empty header if we are starting the array */ 1046 /* write empty header if we are starting the array */
978 if (!ret && !mddev->pers) 1047 if (!ret && !mddev->pers)
979 ret = ppl_write_empty_header(log); 1048 ret = ppl_write_empty_header(log);
980 1049
981 __free_page(page); 1050 __free_page(page);
1051 __free_page(page2);
982 1052
983 pr_debug("%s: return: %d mismatch_count: %d recovered_entries: %d\n", 1053 pr_debug("%s: return: %d mismatch_count: %d recovered_entries: %d\n",
984 __func__, ret, ppl_conf->mismatch_count, 1054 __func__, ret, ppl_conf->mismatch_count,
@@ -1031,6 +1101,7 @@ static int ppl_load(struct ppl_conf *ppl_conf)
1031static void __ppl_exit_log(struct ppl_conf *ppl_conf) 1101static void __ppl_exit_log(struct ppl_conf *ppl_conf)
1032{ 1102{
1033 clear_bit(MD_HAS_PPL, &ppl_conf->mddev->flags); 1103 clear_bit(MD_HAS_PPL, &ppl_conf->mddev->flags);
1104 clear_bit(MD_HAS_MULTIPLE_PPLS, &ppl_conf->mddev->flags);
1034 1105
1035 kfree(ppl_conf->child_logs); 1106 kfree(ppl_conf->child_logs);
1036 1107
@@ -1099,6 +1170,22 @@ static int ppl_validate_rdev(struct md_rdev *rdev)
1099 return 0; 1170 return 0;
1100} 1171}
1101 1172
1173static void ppl_init_child_log(struct ppl_log *log, struct md_rdev *rdev)
1174{
1175 if ((rdev->ppl.size << 9) >= (PPL_SPACE_SIZE +
1176 PPL_HEADER_SIZE) * 2) {
1177 log->use_multippl = true;
1178 set_bit(MD_HAS_MULTIPLE_PPLS,
1179 &log->ppl_conf->mddev->flags);
1180 log->entry_space = PPL_SPACE_SIZE;
1181 } else {
1182 log->use_multippl = false;
1183 log->entry_space = (log->rdev->ppl.size << 9) -
1184 PPL_HEADER_SIZE;
1185 }
1186 log->next_io_sector = rdev->ppl.sector;
1187}
1188
1102int ppl_init_log(struct r5conf *conf) 1189int ppl_init_log(struct r5conf *conf)
1103{ 1190{
1104 struct ppl_conf *ppl_conf; 1191 struct ppl_conf *ppl_conf;
@@ -1196,6 +1283,7 @@ int ppl_init_log(struct r5conf *conf)
1196 q = bdev_get_queue(rdev->bdev); 1283 q = bdev_get_queue(rdev->bdev);
1197 if (test_bit(QUEUE_FLAG_WC, &q->queue_flags)) 1284 if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
1198 need_cache_flush = true; 1285 need_cache_flush = true;
1286 ppl_init_child_log(log, rdev);
1199 } 1287 }
1200 } 1288 }
1201 1289
@@ -1261,6 +1349,7 @@ int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add)
1261 if (!ret) { 1349 if (!ret) {
1262 log->rdev = rdev; 1350 log->rdev = rdev;
1263 ret = ppl_write_empty_header(log); 1351 ret = ppl_write_empty_header(log);
1352 ppl_init_child_log(log, rdev);
1264 } 1353 }
1265 } else { 1354 } else {
1266 log->rdev = NULL; 1355 log->rdev = NULL;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 3ae8bbceb6c4..4188a4881148 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -494,7 +494,6 @@ static int grow_buffers(struct stripe_head *sh, gfp_t gfp)
494 return 0; 494 return 0;
495} 495}
496 496
497static void raid5_build_block(struct stripe_head *sh, int i, int previous);
498static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous, 497static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous,
499 struct stripe_head *sh); 498 struct stripe_head *sh);
500 499
@@ -530,7 +529,7 @@ retry:
530 WARN_ON(1); 529 WARN_ON(1);
531 } 530 }
532 dev->flags = 0; 531 dev->flags = 0;
533 raid5_build_block(sh, i, previous); 532 dev->sector = raid5_compute_blocknr(sh, i, previous);
534 } 533 }
535 if (read_seqcount_retry(&conf->gen_lock, seq)) 534 if (read_seqcount_retry(&conf->gen_lock, seq))
536 goto retry; 535 goto retry;
@@ -2662,14 +2661,6 @@ static void raid5_end_write_request(struct bio *bi)
2662 raid5_release_stripe(sh->batch_head); 2661 raid5_release_stripe(sh->batch_head);
2663} 2662}
2664 2663
2665static void raid5_build_block(struct stripe_head *sh, int i, int previous)
2666{
2667 struct r5dev *dev = &sh->dev[i];
2668
2669 dev->flags = 0;
2670 dev->sector = raid5_compute_blocknr(sh, i, previous);
2671}
2672
2673static void raid5_error(struct mddev *mddev, struct md_rdev *rdev) 2664static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
2674{ 2665{
2675 char b[BDEVNAME_SIZE]; 2666 char b[BDEVNAME_SIZE];
@@ -6237,6 +6228,10 @@ static void raid5_do_work(struct work_struct *work)
6237 6228
6238 spin_unlock_irq(&conf->device_lock); 6229 spin_unlock_irq(&conf->device_lock);
6239 6230
6231 flush_deferred_bios(conf);
6232
6233 r5l_flush_stripe_to_raid(conf->log);
6234
6240 async_tx_issue_pending_all(); 6235 async_tx_issue_pending_all();
6241 blk_finish_plug(&plug); 6236 blk_finish_plug(&plug);
6242 6237
@@ -7243,6 +7238,7 @@ static int raid5_run(struct mddev *mddev)
7243 pr_warn("md/raid:%s: using journal device and PPL not allowed - disabling PPL\n", 7238 pr_warn("md/raid:%s: using journal device and PPL not allowed - disabling PPL\n",
7244 mdname(mddev)); 7239 mdname(mddev));
7245 clear_bit(MD_HAS_PPL, &mddev->flags); 7240 clear_bit(MD_HAS_PPL, &mddev->flags);
7241 clear_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags);
7246 } 7242 }
7247 7243
7248 if (mddev->private == NULL) 7244 if (mddev->private == NULL)
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
index d500bd224979..b9197976b660 100644
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -324,9 +324,10 @@ struct mdp_superblock_1 {
324#define MD_FEATURE_RECOVERY_BITMAP 128 /* recovery that is happening 324#define MD_FEATURE_RECOVERY_BITMAP 128 /* recovery that is happening
325 * is guided by bitmap. 325 * is guided by bitmap.
326 */ 326 */
327#define MD_FEATURE_CLUSTERED 256 /* clustered MD */ 327#define MD_FEATURE_CLUSTERED 256 /* clustered MD */
328#define MD_FEATURE_JOURNAL 512 /* support write cache */ 328#define MD_FEATURE_JOURNAL 512 /* support write cache */
329#define MD_FEATURE_PPL 1024 /* support PPL */ 329#define MD_FEATURE_PPL 1024 /* support PPL */
330#define MD_FEATURE_MULTIPLE_PPLS 2048 /* support for multiple PPLs */
330#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ 331#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \
331 |MD_FEATURE_RECOVERY_OFFSET \ 332 |MD_FEATURE_RECOVERY_OFFSET \
332 |MD_FEATURE_RESHAPE_ACTIVE \ 333 |MD_FEATURE_RESHAPE_ACTIVE \
@@ -338,6 +339,7 @@ struct mdp_superblock_1 {
338 |MD_FEATURE_CLUSTERED \ 339 |MD_FEATURE_CLUSTERED \
339 |MD_FEATURE_JOURNAL \ 340 |MD_FEATURE_JOURNAL \
340 |MD_FEATURE_PPL \ 341 |MD_FEATURE_PPL \
342 |MD_FEATURE_MULTIPLE_PPLS \
341 ) 343 )
342 344
343struct r5l_payload_header { 345struct r5l_payload_header {
diff --git a/lib/raid6/avx512.c b/lib/raid6/avx512.c
index f524a7972006..46df7977b971 100644
--- a/lib/raid6/avx512.c
+++ b/lib/raid6/avx512.c
@@ -29,7 +29,7 @@
29 29
30static const struct raid6_avx512_constants { 30static const struct raid6_avx512_constants {
31 u64 x1d[8]; 31 u64 x1d[8];
32} raid6_avx512_constants __aligned(512) = { 32} raid6_avx512_constants __aligned(512/8) = {
33 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, 33 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
34 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, 34 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
35 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, 35 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,