aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPawel Baldysiak <pawel.baldysiak@intel.com>2017-08-16 11:13:45 -0400
committerShaohua Li <shli@fb.com>2017-08-28 10:45:48 -0400
commitddc088238cd6988bb4ac3776f403d7ff9d3c7a63 (patch)
tree7f3a97e6af0ae88416519fbfe9af9b9ed08324b0
parent8a8e6f84ad10e7a10bde438b42926da0e9bc820f (diff)
md: Runtime support for multiple ppls
Increase PPL area to 1MB and use it as circular buffer to store PPL. The entry with highest generation number is the latest one. If PPL to be written is larger then space left in a buffer, rewind the buffer to the start (don't wrap it). Signed-off-by: Pawel Baldysiak <pawel.baldysiak@intel.com> Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com> Signed-off-by: Shaohua Li <shli@fb.com>
-rw-r--r--drivers/md/md.c16
-rw-r--r--drivers/md/md.h1
-rw-r--r--drivers/md/raid0.c3
-rw-r--r--drivers/md/raid1.c3
-rw-r--r--drivers/md/raid5-ppl.c43
-rw-r--r--drivers/md/raid5.c1
-rw-r--r--include/uapi/linux/raid/md_p.h4
7 files changed, 62 insertions, 9 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index a74dc9963822..a7876237de10 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1536,7 +1536,8 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
1536 } else if (sb->bblog_offset != 0) 1536 } else if (sb->bblog_offset != 0)
1537 rdev->badblocks.shift = 0; 1537 rdev->badblocks.shift = 0;
1538 1538
1539 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) { 1539 if ((le32_to_cpu(sb->feature_map) &
1540 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS))) {
1540 rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset); 1541 rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset);
1541 rdev->ppl.size = le16_to_cpu(sb->ppl.size); 1542 rdev->ppl.size = le16_to_cpu(sb->ppl.size);
1542 rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset; 1543 rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
@@ -1655,10 +1656,15 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1655 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL) 1656 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
1656 set_bit(MD_HAS_JOURNAL, &mddev->flags); 1657 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1657 1658
1658 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) { 1659 if (le32_to_cpu(sb->feature_map) &
1660 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS)) {
1659 if (le32_to_cpu(sb->feature_map) & 1661 if (le32_to_cpu(sb->feature_map) &
1660 (MD_FEATURE_BITMAP_OFFSET | MD_FEATURE_JOURNAL)) 1662 (MD_FEATURE_BITMAP_OFFSET | MD_FEATURE_JOURNAL))
1661 return -EINVAL; 1663 return -EINVAL;
1664 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) &&
1665 (le32_to_cpu(sb->feature_map) &
1666 MD_FEATURE_MULTIPLE_PPLS))
1667 return -EINVAL;
1662 set_bit(MD_HAS_PPL, &mddev->flags); 1668 set_bit(MD_HAS_PPL, &mddev->flags);
1663 } 1669 }
1664 } else if (mddev->pers == NULL) { 1670 } else if (mddev->pers == NULL) {
@@ -1875,7 +1881,11 @@ retry:
1875 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL); 1881 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
1876 1882
1877 if (test_bit(MD_HAS_PPL, &mddev->flags)) { 1883 if (test_bit(MD_HAS_PPL, &mddev->flags)) {
1878 sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL); 1884 if (test_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags))
1885 sb->feature_map |=
1886 cpu_to_le32(MD_FEATURE_MULTIPLE_PPLS);
1887 else
1888 sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL);
1879 sb->ppl.offset = cpu_to_le16(rdev->ppl.offset); 1889 sb->ppl.offset = cpu_to_le16(rdev->ppl.offset);
1880 sb->ppl.size = cpu_to_le16(rdev->ppl.size); 1890 sb->ppl.size = cpu_to_le16(rdev->ppl.size);
1881 } 1891 }
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 09db03455801..d4bdfa5c223b 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -236,6 +236,7 @@ enum mddev_flags {
236 * never cause the array to become failed. 236 * never cause the array to become failed.
237 */ 237 */
238 MD_HAS_PPL, /* The raid array has PPL feature set */ 238 MD_HAS_PPL, /* The raid array has PPL feature set */
239 MD_HAS_MULTIPLE_PPLS, /* The raid array has multiple PPLs feature set */
239}; 240};
240 241
241enum mddev_sb_flags { 242enum mddev_sb_flags {
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 6fb81704aff4..fd5e8e5efbef 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -30,7 +30,8 @@
30 ((1L << MD_HAS_JOURNAL) | \ 30 ((1L << MD_HAS_JOURNAL) | \
31 (1L << MD_JOURNAL_CLEAN) | \ 31 (1L << MD_JOURNAL_CLEAN) | \
32 (1L << MD_FAILFAST_SUPPORTED) |\ 32 (1L << MD_FAILFAST_SUPPORTED) |\
33 (1L << MD_HAS_PPL)) 33 (1L << MD_HAS_PPL) | \
34 (1L << MD_HAS_MULTIPLE_PPLS))
34 35
35static int raid0_congested(struct mddev *mddev, int bits) 36static int raid0_congested(struct mddev *mddev, int bits)
36{ 37{
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 79474f47eeef..1f5bd9475dc1 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -48,7 +48,8 @@
48#define UNSUPPORTED_MDDEV_FLAGS \ 48#define UNSUPPORTED_MDDEV_FLAGS \
49 ((1L << MD_HAS_JOURNAL) | \ 49 ((1L << MD_HAS_JOURNAL) | \
50 (1L << MD_JOURNAL_CLEAN) | \ 50 (1L << MD_JOURNAL_CLEAN) | \
51 (1L << MD_HAS_PPL)) 51 (1L << MD_HAS_PPL) | \
52 (1L << MD_HAS_MULTIPLE_PPLS))
52 53
53/* 54/*
54 * Number of guaranteed r1bios in case of extreme VM load: 55 * Number of guaranteed r1bios in case of extreme VM load:
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 44ad5baf3206..b313f17a6260 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -87,6 +87,8 @@
87 * The current io_unit accepting new stripes is always at the end of the list. 87 * The current io_unit accepting new stripes is always at the end of the list.
88 */ 88 */
89 89
90#define PPL_SPACE_SIZE (128 * 1024)
91
90struct ppl_conf { 92struct ppl_conf {
91 struct mddev *mddev; 93 struct mddev *mddev;
92 94
@@ -122,6 +124,10 @@ struct ppl_log {
122 * always at the end of io_list */ 124 * always at the end of io_list */
123 spinlock_t io_list_lock; 125 spinlock_t io_list_lock;
124 struct list_head io_list; /* all io_units of this log */ 126 struct list_head io_list; /* all io_units of this log */
127
128 sector_t next_io_sector;
129 unsigned int entry_space;
130 bool use_multippl;
125}; 131};
126 132
127#define PPL_IO_INLINE_BVECS 32 133#define PPL_IO_INLINE_BVECS 32
@@ -264,13 +270,12 @@ static int ppl_log_stripe(struct ppl_log *log, struct stripe_head *sh)
264 int i; 270 int i;
265 sector_t data_sector = 0; 271 sector_t data_sector = 0;
266 int data_disks = 0; 272 int data_disks = 0;
267 unsigned int entry_space = (log->rdev->ppl.size << 9) - PPL_HEADER_SIZE;
268 struct r5conf *conf = sh->raid_conf; 273 struct r5conf *conf = sh->raid_conf;
269 274
270 pr_debug("%s: stripe: %llu\n", __func__, (unsigned long long)sh->sector); 275 pr_debug("%s: stripe: %llu\n", __func__, (unsigned long long)sh->sector);
271 276
272 /* check if current io_unit is full */ 277 /* check if current io_unit is full */
273 if (io && (io->pp_size == entry_space || 278 if (io && (io->pp_size == log->entry_space ||
274 io->entries_count == PPL_HDR_MAX_ENTRIES)) { 279 io->entries_count == PPL_HDR_MAX_ENTRIES)) {
275 pr_debug("%s: add io_unit blocked by seq: %llu\n", 280 pr_debug("%s: add io_unit blocked by seq: %llu\n",
276 __func__, io->seq); 281 __func__, io->seq);
@@ -451,12 +456,25 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
451 pplhdr->entries_count = cpu_to_le32(io->entries_count); 456 pplhdr->entries_count = cpu_to_le32(io->entries_count);
452 pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PPL_HEADER_SIZE)); 457 pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PPL_HEADER_SIZE));
453 458
459 /* Rewind the buffer if current PPL is larger then remaining space */
460 if (log->use_multippl &&
461 log->rdev->ppl.sector + log->rdev->ppl.size - log->next_io_sector <
462 (PPL_HEADER_SIZE + io->pp_size) >> 9)
463 log->next_io_sector = log->rdev->ppl.sector;
464
465
454 bio->bi_end_io = ppl_log_endio; 466 bio->bi_end_io = ppl_log_endio;
455 bio->bi_opf = REQ_OP_WRITE | REQ_FUA; 467 bio->bi_opf = REQ_OP_WRITE | REQ_FUA;
456 bio->bi_bdev = log->rdev->bdev; 468 bio->bi_bdev = log->rdev->bdev;
457 bio->bi_iter.bi_sector = log->rdev->ppl.sector; 469 bio->bi_iter.bi_sector = log->next_io_sector;
458 bio_add_page(bio, io->header_page, PAGE_SIZE, 0); 470 bio_add_page(bio, io->header_page, PAGE_SIZE, 0);
459 471
472 pr_debug("%s: log->current_io_sector: %llu\n", __func__,
473 (unsigned long long)log->next_io_sector);
474
475 if (log->use_multippl)
476 log->next_io_sector += (PPL_HEADER_SIZE + io->pp_size) >> 9;
477
460 list_for_each_entry(sh, &io->stripe_list, log_list) { 478 list_for_each_entry(sh, &io->stripe_list, log_list) {
461 /* entries for full stripe writes have no partial parity */ 479 /* entries for full stripe writes have no partial parity */
462 if (test_bit(STRIPE_FULL_WRITE, &sh->state)) 480 if (test_bit(STRIPE_FULL_WRITE, &sh->state))
@@ -1031,6 +1049,7 @@ static int ppl_load(struct ppl_conf *ppl_conf)
1031static void __ppl_exit_log(struct ppl_conf *ppl_conf) 1049static void __ppl_exit_log(struct ppl_conf *ppl_conf)
1032{ 1050{
1033 clear_bit(MD_HAS_PPL, &ppl_conf->mddev->flags); 1051 clear_bit(MD_HAS_PPL, &ppl_conf->mddev->flags);
1052 clear_bit(MD_HAS_MULTIPLE_PPLS, &ppl_conf->mddev->flags);
1034 1053
1035 kfree(ppl_conf->child_logs); 1054 kfree(ppl_conf->child_logs);
1036 1055
@@ -1099,6 +1118,22 @@ static int ppl_validate_rdev(struct md_rdev *rdev)
1099 return 0; 1118 return 0;
1100} 1119}
1101 1120
1121static void ppl_init_child_log(struct ppl_log *log, struct md_rdev *rdev)
1122{
1123 if ((rdev->ppl.size << 9) >= (PPL_SPACE_SIZE +
1124 PPL_HEADER_SIZE) * 2) {
1125 log->use_multippl = true;
1126 set_bit(MD_HAS_MULTIPLE_PPLS,
1127 &log->ppl_conf->mddev->flags);
1128 log->entry_space = PPL_SPACE_SIZE;
1129 } else {
1130 log->use_multippl = false;
1131 log->entry_space = (log->rdev->ppl.size << 9) -
1132 PPL_HEADER_SIZE;
1133 }
1134 log->next_io_sector = rdev->ppl.sector;
1135}
1136
1102int ppl_init_log(struct r5conf *conf) 1137int ppl_init_log(struct r5conf *conf)
1103{ 1138{
1104 struct ppl_conf *ppl_conf; 1139 struct ppl_conf *ppl_conf;
@@ -1196,6 +1231,7 @@ int ppl_init_log(struct r5conf *conf)
1196 q = bdev_get_queue(rdev->bdev); 1231 q = bdev_get_queue(rdev->bdev);
1197 if (test_bit(QUEUE_FLAG_WC, &q->queue_flags)) 1232 if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
1198 need_cache_flush = true; 1233 need_cache_flush = true;
1234 ppl_init_child_log(log, rdev);
1199 } 1235 }
1200 } 1236 }
1201 1237
@@ -1261,6 +1297,7 @@ int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add)
1261 if (!ret) { 1297 if (!ret) {
1262 log->rdev = rdev; 1298 log->rdev = rdev;
1263 ret = ppl_write_empty_header(log); 1299 ret = ppl_write_empty_header(log);
1300 ppl_init_child_log(log, rdev);
1264 } 1301 }
1265 } else { 1302 } else {
1266 log->rdev = NULL; 1303 log->rdev = NULL;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 6af57c6c0533..049a958d3c1e 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7236,6 +7236,7 @@ static int raid5_run(struct mddev *mddev)
7236 pr_warn("md/raid:%s: using journal device and PPL not allowed - disabling PPL\n", 7236 pr_warn("md/raid:%s: using journal device and PPL not allowed - disabling PPL\n",
7237 mdname(mddev)); 7237 mdname(mddev));
7238 clear_bit(MD_HAS_PPL, &mddev->flags); 7238 clear_bit(MD_HAS_PPL, &mddev->flags);
7239 clear_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags);
7239 } 7240 }
7240 7241
7241 if (mddev->private == NULL) 7242 if (mddev->private == NULL)
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
index d500bd224979..b9197976b660 100644
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -324,9 +324,10 @@ struct mdp_superblock_1 {
324#define MD_FEATURE_RECOVERY_BITMAP 128 /* recovery that is happening 324#define MD_FEATURE_RECOVERY_BITMAP 128 /* recovery that is happening
325 * is guided by bitmap. 325 * is guided by bitmap.
326 */ 326 */
327#define MD_FEATURE_CLUSTERED 256 /* clustered MD */ 327#define MD_FEATURE_CLUSTERED 256 /* clustered MD */
328#define MD_FEATURE_JOURNAL 512 /* support write cache */ 328#define MD_FEATURE_JOURNAL 512 /* support write cache */
329#define MD_FEATURE_PPL 1024 /* support PPL */ 329#define MD_FEATURE_PPL 1024 /* support PPL */
330#define MD_FEATURE_MULTIPLE_PPLS 2048 /* support for multiple PPLs */
330#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ 331#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \
331 |MD_FEATURE_RECOVERY_OFFSET \ 332 |MD_FEATURE_RECOVERY_OFFSET \
332 |MD_FEATURE_RESHAPE_ACTIVE \ 333 |MD_FEATURE_RESHAPE_ACTIVE \
@@ -338,6 +339,7 @@ struct mdp_superblock_1 {
338 |MD_FEATURE_CLUSTERED \ 339 |MD_FEATURE_CLUSTERED \
339 |MD_FEATURE_JOURNAL \ 340 |MD_FEATURE_JOURNAL \
340 |MD_FEATURE_PPL \ 341 |MD_FEATURE_PPL \
342 |MD_FEATURE_MULTIPLE_PPLS \
341 ) 343 )
342 344
343struct r5l_payload_header { 345struct r5l_payload_header {