raid5-ppl: PPL support for disks with write-back cache enabled

In order to provide data consistency with PPL for disks with write-back cache enabled all data has to be flushed to disks before next PPL entry. The disks to be flushed are marked in the bitmap. It's modified under a mutex and it's only read after PPL io unit is submitted. A limitation of 64 disks in the array has been introduced to keep data structures and implementation simple. RAID5 arrays with so many disks are not likely due to high risk of multiple disks failure. Such restriction should not be a real life limitation. With write-back cache disabled next PPL entry is submitted when data write for current one completes. Data flush defers next log submission so trigger it when there are no stripes for handling found. As PPL assures all data is flushed to disk at request completion, just acknowledge flush request when PPL is enabled. Signed-off-by: Tomasz Majchrzak <tomasz.majchrzak@intel.com> Signed-off-by: Shaohua Li <sh.li@alibaba-inc.com>
author: Tomasz Majchrzak <tomasz.majchrzak@intel.com> 2017-12-27 04:31:40 -0500
committer: Shaohua Li <sh.li@alibaba-inc.com> 2018-01-15 17:29:42 -0500
commit: 1532d9e87e8b2377f12929f9e40724d5fbe6ecc5 (patch)
tree: fa8ec94368dfff1ec93b0366833e6f5d3cbcc70c /drivers/md/raid5-ppl.c
parent: 92e6245deab80f0934a102ba969d8b891b8ba5bf (diff)
1 files changed, 155 insertions, 12 deletions
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 628c0bf7b9fd..2764c2290062 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -85,6 +85,9 @@
 * (for a single member disk). New io_units are added to the end of the list
 * and the first io_unit is submitted, if it is not submitted already.
 * The current io_unit accepting new stripes is always at the end of the list.
+ *
+ * If write-back cache is enabled for any of the disks in the array, its data
+ * must be flushed before next io_unit is submitted.
 */
 #define PPL_SPACE_SIZE (128 * 1024)
@@ -104,6 +107,7 @@ struct ppl_conf {
        struct kmem_cache *io_kc;
        mempool_t *io_pool;
        struct bio_set *bs;
+        struct bio_set *flush_bs;
        /* used only for recovery */
        int recovered_entries;
@@ -128,6 +132,8 @@ struct ppl_log {
        sector_t next_io_sector;
        unsigned int entry_space;
        bool use_multippl;
+        bool wb_cache_on;
+        unsigned long disk_flush_bitmap;
 };
 #define PPL_IO_INLINE_BVECS 32
@@ -145,6 +151,7 @@ struct ppl_io_unit {
        struct list_head stripe_list;   /* stripes added to the io_unit */
        atomic_t pending_stripes;       /* how many stripes not written to raid */
+        atomic_t pending_flushes;       /* how many disk flushes are in progress */
        bool submitted;                 /* true if write to log started */
@@ -249,6 +256,7 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
        INIT_LIST_HEAD(&io->log_sibling);
        INIT_LIST_HEAD(&io->stripe_list);
        atomic_set(&io->pending_stripes, 0);
+        atomic_set(&io->pending_flushes, 0);
        bio_init(&io->bio, io->biovec, PPL_IO_INLINE_BVECS);
        pplhdr = page_address(io->header_page);
@@ -475,7 +483,18 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
        if (log->use_multippl)
                log->next_io_sector += (PPL_HEADER_SIZE + io->pp_size) >> 9;
+        WARN_ON(log->disk_flush_bitmap != 0);
        list_for_each_entry(sh, &io->stripe_list, log_list) {
+                for (i = 0; i < sh->disks; i++) {
+                        struct r5dev *dev = &sh->dev[i];
+                        if ((ppl_conf->child_logs[i].wb_cache_on) &&
+                            (test_bit(R5_Wantwrite, &dev->flags))) {
+                                set_bit(i, &log->disk_flush_bitmap);
+                        }
+                }
                /* entries for full stripe writes have no partial parity */
                if (test_bit(STRIPE_FULL_WRITE, &sh->state))
                        continue;
@@ -540,6 +559,7 @@ static void ppl_io_unit_finished(struct ppl_io_unit *io)
 {
        struct ppl_log *log = io->log;
        struct ppl_conf *ppl_conf = log->ppl_conf;
+        struct r5conf *conf = ppl_conf->mddev->private;
        unsigned long flags;
        pr_debug("%s: seq: %llu\n", __func__, io->seq);
@@ -565,6 +585,112 @@ static void ppl_io_unit_finished(struct ppl_io_unit *io)
        spin_unlock(&ppl_conf->no_mem_stripes_lock);
        local_irq_restore(flags);
+        wake_up(&conf->wait_for_quiescent);
+}
+static void ppl_flush_endio(struct bio *bio)
+{
+        struct ppl_io_unit *io = bio->bi_private;
+        struct ppl_log *log = io->log;
+        struct ppl_conf *ppl_conf = log->ppl_conf;
+        struct r5conf *conf = ppl_conf->mddev->private;
+        char b[BDEVNAME_SIZE];
+        pr_debug("%s: dev: %s\n", __func__, bio_devname(bio, b));
+        if (bio->bi_status) {
+                struct md_rdev *rdev;
+                rcu_read_lock();
+                rdev = md_find_rdev_rcu(conf->mddev, bio_dev(bio));
+                if (rdev)
+                        md_error(rdev->mddev, rdev);
+                rcu_read_unlock();
+        }
+        bio_put(bio);
+        if (atomic_dec_and_test(&io->pending_flushes)) {
+                ppl_io_unit_finished(io);
+                md_wakeup_thread(conf->mddev->thread);
+        }
+}
+static void ppl_do_flush(struct ppl_io_unit *io)
+{
+        struct ppl_log *log = io->log;
+        struct ppl_conf *ppl_conf = log->ppl_conf;
+        struct r5conf *conf = ppl_conf->mddev->private;
+        int raid_disks = conf->raid_disks;
+        int flushed_disks = 0;
+        int i;
+        atomic_set(&io->pending_flushes, raid_disks);
+        for_each_set_bit(i, &log->disk_flush_bitmap, raid_disks) {
+                struct md_rdev *rdev;
+                struct block_device *bdev = NULL;
+                rcu_read_lock();
+                rdev = rcu_dereference(conf->disks[i].rdev);
+                if (rdev && !test_bit(Faulty, &rdev->flags))
+                        bdev = rdev->bdev;
+                rcu_read_unlock();
+                if (bdev) {
+                        struct bio *bio;
+                        char b[BDEVNAME_SIZE];
+                        bio = bio_alloc_bioset(GFP_NOIO, 0, ppl_conf->flush_bs);
+                        bio_set_dev(bio, bdev);
+                        bio->bi_private = io;
+                        bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
+                        bio->bi_end_io = ppl_flush_endio;
+                        pr_debug("%s: dev: %s\n", __func__,
+                                 bio_devname(bio, b));
+                        submit_bio(bio);
+                        flushed_disks++;
+                }
+        }
+        log->disk_flush_bitmap = 0;
+        for (i = flushed_disks ; i < raid_disks; i++) {
+                if (atomic_dec_and_test(&io->pending_flushes))
+                        ppl_io_unit_finished(io);
+        }
+}
+static inline bool ppl_no_io_unit_submitted(struct r5conf *conf,
+                                            struct ppl_log *log)
+{
+        struct ppl_io_unit *io;
+        io = list_first_entry_or_null(&log->io_list, struct ppl_io_unit,
+                                      log_sibling);
+        return !io || !io->submitted;
+}
+void ppl_quiesce(struct r5conf *conf, int quiesce)
+{
+        struct ppl_conf *ppl_conf = conf->log_private;
+        int i;
+        if (quiesce) {
+                for (i = 0; i < ppl_conf->count; i++) {
+                        struct ppl_log *log = &ppl_conf->child_logs[i];
+                        spin_lock_irq(&log->io_list_lock);
+                        wait_event_lock_irq(conf->wait_for_quiescent,
+                                            ppl_no_io_unit_submitted(conf, log),
+                                            log->io_list_lock);
+                        spin_unlock_irq(&log->io_list_lock);
+                }
+        }
 }
 void ppl_stripe_write_finished(struct stripe_head *sh)
@@ -574,8 +700,12 @@ void ppl_stripe_write_finished(struct stripe_head *sh)
        io = sh->ppl_io;
        sh->ppl_io = NULL;
-        if (io && atomic_dec_and_test(&io->pending_stripes))
+        if (io && atomic_dec_and_test(&io->pending_stripes)) {
-                ppl_io_unit_finished(io);
+                if (io->log->disk_flush_bitmap)
+                        ppl_do_flush(io);
+                else
+                        ppl_io_unit_finished(io);
+        }
 }
 static void ppl_xor(int size, struct page *page1, struct page *page2)
@@ -1108,6 +1238,8 @@ static void __ppl_exit_log(struct ppl_conf *ppl_conf)
        if (ppl_conf->bs)
                bioset_free(ppl_conf->bs);
+        if (ppl_conf->flush_bs)
+                bioset_free(ppl_conf->flush_bs);
        mempool_destroy(ppl_conf->io_pool);
        kmem_cache_destroy(ppl_conf->io_kc);
@@ -1173,6 +1305,8 @@ static int ppl_validate_rdev(struct md_rdev *rdev)
 static void ppl_init_child_log(struct ppl_log *log, struct md_rdev *rdev)
 {
+        struct request_queue *q;
        if ((rdev->ppl.size << 9) >= (PPL_SPACE_SIZE +
                                      PPL_HEADER_SIZE) * 2) {
                log->use_multippl = true;
@@ -1185,6 +1319,10 @@ static void ppl_init_child_log(struct ppl_log *log, struct md_rdev *rdev)
                                   PPL_HEADER_SIZE;
        }
        log->next_io_sector = rdev->ppl.sector;
+        q = bdev_get_queue(rdev->bdev);
+        if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
+                log->wb_cache_on = true;
 }
 int ppl_init_log(struct r5conf *conf)
@@ -1192,8 +1330,8 @@ int ppl_init_log(struct r5conf *conf)
        struct ppl_conf *ppl_conf;
        struct mddev *mddev = conf->mddev;
        int ret = 0;
+        int max_disks;
        int i;
-        bool need_cache_flush = false;
        pr_debug("md/raid:%s: enabling distributed Partial Parity Log\n",
                 mdname(conf->mddev));
@@ -1219,6 +1357,14 @@ int ppl_init_log(struct r5conf *conf)
                return -EINVAL;
        }
+        max_disks = FIELD_SIZEOF(struct ppl_log, disk_flush_bitmap) *
+                BITS_PER_BYTE;
+        if (conf->raid_disks > max_disks) {
+                pr_warn("md/raid:%s PPL doesn't support over %d disks in the array\n",
+                        mdname(mddev), max_disks);
+                return -EINVAL;
+        }
        ppl_conf = kzalloc(sizeof(struct ppl_conf), GFP_KERNEL);
        if (!ppl_conf)
                return -ENOMEM;
@@ -1244,6 +1390,12 @@ int ppl_init_log(struct r5conf *conf)
                goto err;
        }
+        ppl_conf->flush_bs = bioset_create(conf->raid_disks, 0, 0);
+        if (!ppl_conf->flush_bs) {
+                ret = -ENOMEM;
+                goto err;
+        }
        ppl_conf->count = conf->raid_disks;
        ppl_conf->child_logs = kcalloc(ppl_conf->count, sizeof(struct ppl_log),
                                       GFP_KERNEL);
@@ -1275,23 +1427,14 @@ int ppl_init_log(struct r5conf *conf)
                log->rdev = rdev;
                if (rdev) {
-                        struct request_queue *q;
                        ret = ppl_validate_rdev(rdev);
                        if (ret)
                                goto err;
-                        q = bdev_get_queue(rdev->bdev);
-                        if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
-                                need_cache_flush = true;
                        ppl_init_child_log(log, rdev);
                }
        }
-        if (need_cache_flush)
-                pr_warn("md/raid:%s: Volatile write-back cache should be disabled on all member drives when using PPL!\n",
-                        mdname(mddev));
        /* load and possibly recover the logs from the member disks */
        ret = ppl_load(ppl_conf);
author	Tomasz Majchrzak <tomasz.majchrzak@intel.com>	2017-12-27 04:31:40 -0500
committer	Shaohua Li <sh.li@alibaba-inc.com>	2018-01-15 17:29:42 -0500
commit	1532d9e87e8b2377f12929f9e40724d5fbe6ecc5 (patch)
tree	fa8ec94368dfff1ec93b0366833e6f5d3cbcc70c /drivers/md/raid5-ppl.c
parent	92e6245deab80f0934a102ba969d8b891b8ba5bf (diff)

diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index 628c0bf7b9fd..2764c2290062 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c
@@ -85,6 +85,9 @@
85	* (for a single member disk). New io_units are added to the end of the list	85	* (for a single member disk). New io_units are added to the end of the list
86	* and the first io_unit is submitted, if it is not submitted already.	86	* and the first io_unit is submitted, if it is not submitted already.
87	* The current io_unit accepting new stripes is always at the end of the list.	87	* The current io_unit accepting new stripes is always at the end of the list.
		88	*
		89	* If write-back cache is enabled for any of the disks in the array, its data
		90	* must be flushed before next io_unit is submitted.
88	*/	91	*/
89		92
90	#define PPL_SPACE_SIZE (128 * 1024)	93	#define PPL_SPACE_SIZE (128 * 1024)
@@ -104,6 +107,7 @@ struct ppl_conf {
104	struct kmem_cache *io_kc;	107	struct kmem_cache *io_kc;
105	mempool_t *io_pool;	108	mempool_t *io_pool;
106	struct bio_set *bs;	109	struct bio_set *bs;
		110	struct bio_set *flush_bs;
107		111
108	/* used only for recovery */	112	/* used only for recovery */
109	int recovered_entries;	113	int recovered_entries;
@@ -128,6 +132,8 @@ struct ppl_log {
128	sector_t next_io_sector;	132	sector_t next_io_sector;
129	unsigned int entry_space;	133	unsigned int entry_space;
130	bool use_multippl;	134	bool use_multippl;
		135	bool wb_cache_on;
		136	unsigned long disk_flush_bitmap;
131	};	137	};
132		138
133	#define PPL_IO_INLINE_BVECS 32	139	#define PPL_IO_INLINE_BVECS 32
@@ -145,6 +151,7 @@ struct ppl_io_unit {
145		151
146	struct list_head stripe_list; /* stripes added to the io_unit */	152	struct list_head stripe_list; /* stripes added to the io_unit */
147	atomic_t pending_stripes; /* how many stripes not written to raid */	153	atomic_t pending_stripes; /* how many stripes not written to raid */
		154	atomic_t pending_flushes; /* how many disk flushes are in progress */
148		155
149	bool submitted; /* true if write to log started */	156	bool submitted; /* true if write to log started */
150		157
@@ -249,6 +256,7 @@ static struct ppl_io_unit ppl_new_iounit(struct ppl_log log,
249	INIT_LIST_HEAD(&io->log_sibling);	256	INIT_LIST_HEAD(&io->log_sibling);
250	INIT_LIST_HEAD(&io->stripe_list);	257	INIT_LIST_HEAD(&io->stripe_list);
251	atomic_set(&io->pending_stripes, 0);	258	atomic_set(&io->pending_stripes, 0);
		259	atomic_set(&io->pending_flushes, 0);
252	bio_init(&io->bio, io->biovec, PPL_IO_INLINE_BVECS);	260	bio_init(&io->bio, io->biovec, PPL_IO_INLINE_BVECS);
253		261
254	pplhdr = page_address(io->header_page);	262	pplhdr = page_address(io->header_page);
@@ -475,7 +483,18 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
475	if (log->use_multippl)	483	if (log->use_multippl)
476	log->next_io_sector += (PPL_HEADER_SIZE + io->pp_size) >> 9;	484	log->next_io_sector += (PPL_HEADER_SIZE + io->pp_size) >> 9;
477		485
		486	WARN_ON(log->disk_flush_bitmap != 0);
		487
478	list_for_each_entry(sh, &io->stripe_list, log_list) {	488	list_for_each_entry(sh, &io->stripe_list, log_list) {
		489	for (i = 0; i < sh->disks; i++) {
		490	struct r5dev *dev = &sh->dev[i];
		491
		492	if ((ppl_conf->child_logs[i].wb_cache_on) &&
		493	(test_bit(R5_Wantwrite, &dev->flags))) {
		494	set_bit(i, &log->disk_flush_bitmap);
		495	}
		496	}
		497
479	/* entries for full stripe writes have no partial parity */	498	/* entries for full stripe writes have no partial parity */
480	if (test_bit(STRIPE_FULL_WRITE, &sh->state))	499	if (test_bit(STRIPE_FULL_WRITE, &sh->state))
481	continue;	500	continue;
@@ -540,6 +559,7 @@ static void ppl_io_unit_finished(struct ppl_io_unit *io)
540	{	559	{
541	struct ppl_log *log = io->log;	560	struct ppl_log *log = io->log;
542	struct ppl_conf *ppl_conf = log->ppl_conf;	561	struct ppl_conf *ppl_conf = log->ppl_conf;
		562	struct r5conf *conf = ppl_conf->mddev->private;
543	unsigned long flags;	563	unsigned long flags;
544		564
545	pr_debug("%s: seq: %llu\n", __func__, io->seq);	565	pr_debug("%s: seq: %llu\n", __func__, io->seq);
@@ -565,6 +585,112 @@ static void ppl_io_unit_finished(struct ppl_io_unit *io)
565	spin_unlock(&ppl_conf->no_mem_stripes_lock);	585	spin_unlock(&ppl_conf->no_mem_stripes_lock);
566		586
567	local_irq_restore(flags);	587	local_irq_restore(flags);
		588
		589	wake_up(&conf->wait_for_quiescent);
		590	}
		591
		592	static void ppl_flush_endio(struct bio *bio)
		593	{
		594	struct ppl_io_unit *io = bio->bi_private;
		595	struct ppl_log *log = io->log;
		596	struct ppl_conf *ppl_conf = log->ppl_conf;
		597	struct r5conf *conf = ppl_conf->mddev->private;
		598	char b[BDEVNAME_SIZE];
		599
		600	pr_debug("%s: dev: %s\n", __func__, bio_devname(bio, b));
		601
		602	if (bio->bi_status) {
		603	struct md_rdev *rdev;
		604
		605	rcu_read_lock();
		606	rdev = md_find_rdev_rcu(conf->mddev, bio_dev(bio));
		607	if (rdev)
		608	md_error(rdev->mddev, rdev);
		609	rcu_read_unlock();
		610	}
		611
		612	bio_put(bio);
		613
		614	if (atomic_dec_and_test(&io->pending_flushes)) {
		615	ppl_io_unit_finished(io);
		616	md_wakeup_thread(conf->mddev->thread);
		617	}
		618	}
		619
		620	static void ppl_do_flush(struct ppl_io_unit *io)
		621	{
		622	struct ppl_log *log = io->log;
		623	struct ppl_conf *ppl_conf = log->ppl_conf;
		624	struct r5conf *conf = ppl_conf->mddev->private;
		625	int raid_disks = conf->raid_disks;
		626	int flushed_disks = 0;
		627	int i;
		628
		629	atomic_set(&io->pending_flushes, raid_disks);
		630
		631	for_each_set_bit(i, &log->disk_flush_bitmap, raid_disks) {
		632	struct md_rdev *rdev;
		633	struct block_device *bdev = NULL;
		634
		635	rcu_read_lock();
		636	rdev = rcu_dereference(conf->disks[i].rdev);
		637	if (rdev && !test_bit(Faulty, &rdev->flags))
		638	bdev = rdev->bdev;
		639	rcu_read_unlock();
		640
		641	if (bdev) {
		642	struct bio *bio;
		643	char b[BDEVNAME_SIZE];
		644
		645	bio = bio_alloc_bioset(GFP_NOIO, 0, ppl_conf->flush_bs);
		646	bio_set_dev(bio, bdev);
		647	bio->bi_private = io;
		648	bio->bi_opf = REQ_OP_WRITE \| REQ_PREFLUSH;
		649	bio->bi_end_io = ppl_flush_endio;
		650
		651	pr_debug("%s: dev: %s\n", __func__,
		652	bio_devname(bio, b));
		653
		654	submit_bio(bio);
		655	flushed_disks++;
		656	}
		657	}
		658
		659	log->disk_flush_bitmap = 0;
		660
		661	for (i = flushed_disks ; i < raid_disks; i++) {
		662	if (atomic_dec_and_test(&io->pending_flushes))
		663	ppl_io_unit_finished(io);
		664	}
		665	}
		666
		667	static inline bool ppl_no_io_unit_submitted(struct r5conf *conf,
		668	struct ppl_log *log)
		669	{
		670	struct ppl_io_unit *io;
		671
		672	io = list_first_entry_or_null(&log->io_list, struct ppl_io_unit,
		673	log_sibling);
		674
		675	return !io \|\| !io->submitted;
		676	}
		677
		678	void ppl_quiesce(struct r5conf *conf, int quiesce)
		679	{
		680	struct ppl_conf *ppl_conf = conf->log_private;
		681	int i;
		682
		683	if (quiesce) {
		684	for (i = 0; i < ppl_conf->count; i++) {
		685	struct ppl_log *log = &ppl_conf->child_logs[i];
		686
		687	spin_lock_irq(&log->io_list_lock);
		688	wait_event_lock_irq(conf->wait_for_quiescent,
		689	ppl_no_io_unit_submitted(conf, log),
		690	log->io_list_lock);
		691	spin_unlock_irq(&log->io_list_lock);
		692	}
		693	}
568	}	694	}
569		695
570	void ppl_stripe_write_finished(struct stripe_head *sh)	696	void ppl_stripe_write_finished(struct stripe_head *sh)
@@ -574,8 +700,12 @@ void ppl_stripe_write_finished(struct stripe_head *sh)
574	io = sh->ppl_io;	700	io = sh->ppl_io;
575	sh->ppl_io = NULL;	701	sh->ppl_io = NULL;
576		702
577	if (io && atomic_dec_and_test(&io->pending_stripes))	703	if (io && atomic_dec_and_test(&io->pending_stripes)) {
578	ppl_io_unit_finished(io);	704	if (io->log->disk_flush_bitmap)
		705	ppl_do_flush(io);
		706	else
		707	ppl_io_unit_finished(io);
		708	}
579	}	709	}
580		710
581	static void ppl_xor(int size, struct page page1, struct page page2)	711	static void ppl_xor(int size, struct page page1, struct page page2)
@@ -1108,6 +1238,8 @@ static void __ppl_exit_log(struct ppl_conf *ppl_conf)
1108		1238
1109	if (ppl_conf->bs)	1239	if (ppl_conf->bs)
1110	bioset_free(ppl_conf->bs);	1240	bioset_free(ppl_conf->bs);
		1241	if (ppl_conf->flush_bs)
		1242	bioset_free(ppl_conf->flush_bs);
1111	mempool_destroy(ppl_conf->io_pool);	1243	mempool_destroy(ppl_conf->io_pool);
1112	kmem_cache_destroy(ppl_conf->io_kc);	1244	kmem_cache_destroy(ppl_conf->io_kc);
1113		1245
@@ -1173,6 +1305,8 @@ static int ppl_validate_rdev(struct md_rdev *rdev)
1173		1305
1174	static void ppl_init_child_log(struct ppl_log log, struct md_rdev rdev)	1306	static void ppl_init_child_log(struct ppl_log log, struct md_rdev rdev)
1175	{	1307	{
		1308	struct request_queue *q;
		1309
1176	if ((rdev->ppl.size << 9) >= (PPL_SPACE_SIZE +	1310	if ((rdev->ppl.size << 9) >= (PPL_SPACE_SIZE +
1177	PPL_HEADER_SIZE) * 2) {	1311	PPL_HEADER_SIZE) * 2) {
1178	log->use_multippl = true;	1312	log->use_multippl = true;
@@ -1185,6 +1319,10 @@ static void ppl_init_child_log(struct ppl_log log, struct md_rdev rdev)
1185	PPL_HEADER_SIZE;	1319	PPL_HEADER_SIZE;
1186	}	1320	}
1187	log->next_io_sector = rdev->ppl.sector;	1321	log->next_io_sector = rdev->ppl.sector;
		1322
		1323	q = bdev_get_queue(rdev->bdev);
		1324	if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
		1325	log->wb_cache_on = true;
1188	}	1326	}
1189		1327
1190	int ppl_init_log(struct r5conf *conf)	1328	int ppl_init_log(struct r5conf *conf)
@@ -1192,8 +1330,8 @@ int ppl_init_log(struct r5conf *conf)
1192	struct ppl_conf *ppl_conf;	1330	struct ppl_conf *ppl_conf;
1193	struct mddev *mddev = conf->mddev;	1331	struct mddev *mddev = conf->mddev;
1194	int ret = 0;	1332	int ret = 0;
		1333	int max_disks;
1195	int i;	1334	int i;
1196	bool need_cache_flush = false;
1197		1335
1198	pr_debug("md/raid:%s: enabling distributed Partial Parity Log\n",	1336	pr_debug("md/raid:%s: enabling distributed Partial Parity Log\n",
1199	mdname(conf->mddev));	1337	mdname(conf->mddev));
@@ -1219,6 +1357,14 @@ int ppl_init_log(struct r5conf *conf)
1219	return -EINVAL;	1357	return -EINVAL;
1220	}	1358	}
1221		1359
		1360	max_disks = FIELD_SIZEOF(struct ppl_log, disk_flush_bitmap) *
		1361	BITS_PER_BYTE;
		1362	if (conf->raid_disks > max_disks) {
		1363	pr_warn("md/raid:%s PPL doesn't support over %d disks in the array\n",
		1364	mdname(mddev), max_disks);
		1365	return -EINVAL;
		1366	}
		1367
1222	ppl_conf = kzalloc(sizeof(struct ppl_conf), GFP_KERNEL);	1368	ppl_conf = kzalloc(sizeof(struct ppl_conf), GFP_KERNEL);
1223	if (!ppl_conf)	1369	if (!ppl_conf)
1224	return -ENOMEM;	1370	return -ENOMEM;
@@ -1244,6 +1390,12 @@ int ppl_init_log(struct r5conf *conf)
1244	goto err;	1390	goto err;
1245	}	1391	}
1246		1392
		1393	ppl_conf->flush_bs = bioset_create(conf->raid_disks, 0, 0);
		1394	if (!ppl_conf->flush_bs) {
		1395	ret = -ENOMEM;
		1396	goto err;
		1397	}
		1398
1247	ppl_conf->count = conf->raid_disks;	1399	ppl_conf->count = conf->raid_disks;
1248	ppl_conf->child_logs = kcalloc(ppl_conf->count, sizeof(struct ppl_log),	1400	ppl_conf->child_logs = kcalloc(ppl_conf->count, sizeof(struct ppl_log),
1249	GFP_KERNEL);	1401	GFP_KERNEL);
@@ -1275,23 +1427,14 @@ int ppl_init_log(struct r5conf *conf)
1275	log->rdev = rdev;	1427	log->rdev = rdev;
1276		1428
1277	if (rdev) {	1429	if (rdev) {
1278	struct request_queue *q;
1279
1280	ret = ppl_validate_rdev(rdev);	1430	ret = ppl_validate_rdev(rdev);
1281	if (ret)	1431	if (ret)
1282	goto err;	1432	goto err;
1283		1433
1284	q = bdev_get_queue(rdev->bdev);
1285	if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
1286	need_cache_flush = true;
1287	ppl_init_child_log(log, rdev);	1434	ppl_init_child_log(log, rdev);
1288	}	1435	}
1289	}	1436	}
1290		1437
1291	if (need_cache_flush)
1292	pr_warn("md/raid:%s: Volatile write-back cache should be disabled on all member drives when using PPL!\n",
1293	mdname(mddev));
1294
1295	/* load and possibly recover the logs from the member disks */	1438	/* load and possibly recover the logs from the member disks */
1296	ret = ppl_load(ppl_conf);	1439	ret = ppl_load(ppl_conf);
1297		1440