MD: fix lock contention for flush bios

There is a lock contention when there are many processes which send flush bios to md device. eg. Create many lvs on one raid device and mkfs.xfs on each lv. Now it just can handle flush request sequentially. It needs to wait mddev->flush_bio to be NULL, otherwise get mddev->lock. This patch remove mddev->flush_bio and handle flush bio asynchronously. I did a test with command dbench -s 128 -t 300. This is the test result: =================Without the patch============================ Operation Count AvgLat MaxLat -------------------------------------------------- Flush 11165 167.595 5879.560 Close 107469 1.391 2231.094 LockX 384 0.003 0.019 Rename 5944 2.141 1856.001 ReadX 208121 0.003 0.074 WriteX 98259 1925.402 15204.895 Unlink 25198 13.264 3457.268 UnlockX 384 0.001 0.009 FIND_FIRST 47111 0.012 0.076 SET_FILE_INFORMATION 12966 0.007 0.065 QUERY_FILE_INFORMATION 27921 0.004 0.085 QUERY_PATH_INFORMATION 124650 0.005 5.766 QUERY_FS_INFORMATION 22519 0.003 0.053 NTCreateX 141086 4.291 2502.812 Throughput 3.7181 MB/sec (sync open) 128 clients 128 procs max_latency=15204.905 ms =================With the patch============================ Operation Count AvgLat MaxLat -------------------------------------------------- Flush 4500 174.134 406.398 Close 48195 0.060 467.062 LockX 256 0.003 0.029 Rename 2324 0.026 0.360 ReadX 78846 0.004 0.504 WriteX 66832 562.775 1467.037 Unlink 5516 3.665 1141.740 UnlockX 256 0.002 0.019 FIND_FIRST 16428 0.015 0.313 SET_FILE_INFORMATION 6400 0.009 0.520 QUERY_FILE_INFORMATION 17865 0.003 0.089 QUERY_PATH_INFORMATION 47060 0.078 416.299 QUERY_FS_INFORMATION 7024 0.004 0.032 NTCreateX 55921 0.854 1141.452 Throughput 11.744 MB/sec (sync open) 128 clients 128 procs max_latency=1467.041 ms The test is done on raid1 disk with two rotational disks V5: V4 is more complicated than the version with memory pool. So revert to the memory pool version V4: use address of fbio to do hash to choose free flush info. V3: Shaohua suggests mempool is overkill. In v3 it allocs memory during creating raid device and uses a simple bitmap to record which resource is free. Fix a bug from v2. It should set flush_pending to 1 at first. V2: Neil pointed out two problems. One is counting error problem and another is return value when allocat memory fails. 1. counting error problem This isn't safe. It is only safe to call rdev_dec_pending() on rdevs that you previously called atomic_inc(&rdev->nr_pending); If an rdev was added to the list between the start and end of the flush, this will do something bad. Now it doesn't use bio_chain. It uses specified call back function for each flush bio. 2. Returned on IO error when kmalloc fails is wrong. I use mempool suggested by Neil in V2 3. Fixed some places pointed by Guoqing Suggested-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Xiao Ni <xni@redhat.com> Signed-off-by: Shaohua Li <shli@fb.com>
author: Xiao Ni <xni@redhat.com> 2018-05-20 23:49:54 -0400
committer: Shaohua Li <shli@fb.com> 2018-05-21 12:30:26 -0400
commit: 5a409b4f56d50b212334f338cb8465d65550cd85 (patch)
tree: 05c7ca56936fa8f658a2b08d09b74becd0d36f42
parent: 448ec638c6bcf36923db695b6188373b933e7ddc (diff)
2 files changed, 120 insertions, 60 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index df9eb1a04f26..6b4e2f29fe4e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -130,6 +130,24 @@ static inline int speed_max(struct mddev *mddev)
                mddev->sync_speed_max : sysctl_speed_limit_max;
 }
+static void * flush_info_alloc(gfp_t gfp_flags, void *data)
+{
+        return kzalloc(sizeof(struct flush_info), gfp_flags);
+}
+static void flush_info_free(void *flush_info, void *data)
+{
+        kfree(flush_info);
+}
+static void * flush_bio_alloc(gfp_t gfp_flags, void *data)
+{
+        return kzalloc(sizeof(struct flush_bio), gfp_flags);
+}
+static void flush_bio_free(void *flush_bio, void *data)
+{
+        kfree(flush_bio);
+}
 static struct ctl_table_header *raid_table_header;
 static struct ctl_table raid_table[] = {
@@ -412,30 +430,53 @@ static int md_congested(void *data, int bits)
 /*
 * Generic flush handling for md
 */
+static void submit_flushes(struct work_struct *ws)
+{
+        struct flush_info *fi = container_of(ws, struct flush_info, flush_work);
+        struct mddev *mddev = fi->mddev;
+        struct bio *bio = fi->bio;
+        bio->bi_opf &= ~REQ_PREFLUSH;
+        md_handle_request(mddev, bio);
+        mempool_free(fi, mddev->flush_pool);
+}
-static void md_end_flush(struct bio *bio)
+static void md_end_flush(struct bio *fbio)
 {
-        struct md_rdev *rdev = bio->bi_private;
+        struct flush_bio *fb = fbio->bi_private;
-        struct mddev *mddev = rdev->mddev;
+        struct md_rdev *rdev = fb->rdev;
+        struct flush_info *fi = fb->fi;
+        struct bio *bio = fi->bio;
+        struct mddev *mddev = fi->mddev;
        rdev_dec_pending(rdev, mddev);
-        if (atomic_dec_and_test(&mddev->flush_pending)) {
+        if (atomic_dec_and_test(&fi->flush_pending)) {
-                /* The pre-request flush has finished */
+                if (bio->bi_iter.bi_size == 0)
-                queue_work(md_wq, &mddev->flush_work);
+                        /* an empty barrier - all done */
+                        bio_endio(bio);
+                else {
+                        INIT_WORK(&fi->flush_work, submit_flushes);
+                        queue_work(md_wq, &fi->flush_work);
+                }
        }
-        bio_put(bio);
-}
-static void md_submit_flush_data(struct work_struct *ws);
+        mempool_free(fb, mddev->flush_bio_pool);
+        bio_put(fbio);
+}
-static void submit_flushes(struct work_struct *ws)
+void md_flush_request(struct mddev *mddev, struct bio *bio)
 {
-        struct mddev *mddev = container_of(ws, struct mddev, flush_work);
        struct md_rdev *rdev;
+        struct flush_info *fi;
+        fi = mempool_alloc(mddev->flush_pool, GFP_NOIO);
+        fi->bio = bio;
+        fi->mddev = mddev;
+        atomic_set(&fi->flush_pending, 1);
-        INIT_WORK(&mddev->flush_work, md_submit_flush_data);
-        atomic_set(&mddev->flush_pending, 1);
        rcu_read_lock();
        rdev_for_each_rcu(rdev, mddev)
                if (rdev->raid_disk >= 0 &&
@@ -445,59 +486,39 @@ static void submit_flushes(struct work_struct *ws)
                         * we reclaim rcu_read_lock
                         */
                        struct bio *bi;
+                        struct flush_bio *fb;
                        atomic_inc(&rdev->nr_pending);
                        atomic_inc(&rdev->nr_pending);
                        rcu_read_unlock();
+                        fb = mempool_alloc(mddev->flush_bio_pool, GFP_NOIO);
+                        fb->fi = fi;
+                        fb->rdev = rdev;
                        bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
-                        bi->bi_end_io = md_end_flush;
-                        bi->bi_private = rdev;
                        bio_set_dev(bi, rdev->bdev);
+                        bi->bi_end_io = md_end_flush;
+                        bi->bi_private = fb;
                        bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
-                        atomic_inc(&mddev->flush_pending);
+                        atomic_inc(&fi->flush_pending);
                        submit_bio(bi);
                        rcu_read_lock();
                        rdev_dec_pending(rdev, mddev);
                }
        rcu_read_unlock();
-        if (atomic_dec_and_test(&mddev->flush_pending))
-                queue_work(md_wq, &mddev->flush_work);
-}
-static void md_submit_flush_data(struct work_struct *ws)
-{
-        struct mddev *mddev = container_of(ws, struct mddev, flush_work);
-        struct bio *bio = mddev->flush_bio;
-        /*
+        if (atomic_dec_and_test(&fi->flush_pending)) {
-         * must reset flush_bio before calling into md_handle_request to avoid a
+                if (bio->bi_iter.bi_size == 0)
-         * deadlock, because other bios passed md_handle_request suspend check
+                        /* an empty barrier - all done */
-         * could wait for this and below md_handle_request could wait for those
+                        bio_endio(bio);
-         * bios because of suspend check
+                else {
-         */
+                        INIT_WORK(&fi->flush_work, submit_flushes);
-        mddev->flush_bio = NULL;
+                        queue_work(md_wq, &fi->flush_work);
-        wake_up(&mddev->sb_wait);
+                }
-        if (bio->bi_iter.bi_size == 0)
-                /* an empty barrier - all done */
-                bio_endio(bio);
-        else {
-                bio->bi_opf &= ~REQ_PREFLUSH;
-                md_handle_request(mddev, bio);
        }
 }
-void md_flush_request(struct mddev *mddev, struct bio *bio)
-{
-        spin_lock_irq(&mddev->lock);
-        wait_event_lock_irq(mddev->sb_wait,
-                            !mddev->flush_bio,
-                            mddev->lock);
-        mddev->flush_bio = bio;
-        spin_unlock_irq(&mddev->lock);
-        INIT_WORK(&mddev->flush_work, submit_flushes);
-        queue_work(md_wq, &mddev->flush_work);
-}
 EXPORT_SYMBOL(md_flush_request);
 static inline struct mddev *mddev_get(struct mddev *mddev)
@@ -555,7 +576,6 @@ void mddev_init(struct mddev *mddev)
        atomic_set(&mddev->openers, 0);
        atomic_set(&mddev->active_io, 0);
        spin_lock_init(&mddev->lock);
-        atomic_set(&mddev->flush_pending, 0);
        init_waitqueue_head(&mddev->sb_wait);
        init_waitqueue_head(&mddev->recovery_wait);
        mddev->reshape_position = MaxSector;
@@ -5510,6 +5530,22 @@ int md_run(struct mddev *mddev)
                        goto abort;
                }
        }
+        if (mddev->flush_pool == NULL) {
+                mddev->flush_pool = mempool_create(NR_FLUSH_INFOS, flush_info_alloc,
+                                                flush_info_free, mddev);
+                if (!mddev->flush_pool) {
+                        err = -ENOMEM;
+                        goto abort;
+                }
+        }
+        if (mddev->flush_bio_pool == NULL) {
+                mddev->flush_bio_pool = mempool_create(NR_FLUSH_BIOS, flush_bio_alloc,
+                                                flush_bio_free, mddev);
+                if (!mddev->flush_bio_pool) {
+                        err = -ENOMEM;
+                        goto abort;
+                }
+        }
        spin_lock(&pers_lock);
        pers = find_pers(mddev->level, mddev->clevel);
@@ -5669,6 +5705,14 @@ int md_run(struct mddev *mddev)
        return 0;
 abort:
+        if (mddev->flush_bio_pool) {
+                mempool_destroy(mddev->flush_bio_pool);
+                mddev->flush_bio_pool = NULL;
+        }
+        if (mddev->flush_pool){
+                mempool_destroy(mddev->flush_pool);
+                mddev->flush_pool = NULL;
+        }
        if (mddev->bio_set) {
                bioset_free(mddev->bio_set);
                mddev->bio_set = NULL;
@@ -5889,6 +5933,14 @@ void md_stop(struct mddev *mddev)
         * This is called from dm-raid
         */
        __md_stop(mddev);
+        if (mddev->flush_bio_pool) {
+                mempool_destroy(mddev->flush_bio_pool);
+                mddev->flush_bio_pool = NULL;
+        }
+        if (mddev->flush_pool) {
+                mempool_destroy(mddev->flush_pool);
+                mddev->flush_pool = NULL;
+        }
        if (mddev->bio_set) {
                bioset_free(mddev->bio_set);
                mddev->bio_set = NULL;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index fbc925cce810..ffcb1ae217fe 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -252,6 +252,19 @@ enum mddev_sb_flags {
        MD_SB_NEED_REWRITE,     /* metadata write needs to be repeated */
 };
+#define NR_FLUSH_INFOS 8
+#define NR_FLUSH_BIOS 64
+struct flush_info {
+        struct bio                      *bio;
+        struct mddev                    *mddev;
+        struct work_struct              flush_work;
+        atomic_t                        flush_pending;
+};
+struct flush_bio {
+        struct flush_info *fi;
+        struct md_rdev *rdev;
+};
 struct mddev {
        void                            *private;
        struct md_personality           *pers;
@@ -457,13 +470,8 @@ struct mddev {
                                                   * metadata and bitmap writes
                                                   */
-        /* Generic flush handling.
+        mempool_t                       *flush_pool;
-         * The last to finish preflush schedules a worker to submit
+        mempool_t                       *flush_bio_pool;
-         * the rest of the request (without the REQ_PREFLUSH flag).
-         */
-        struct bio *flush_bio;
-        atomic_t flush_pending;
-        struct work_struct flush_work;
        struct work_struct event_work;  /* used by dm to report failure event */
        void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
        struct md_cluster_info          *cluster_info;
author	Xiao Ni <xni@redhat.com>	2018-05-20 23:49:54 -0400
committer	Shaohua Li <shli@fb.com>	2018-05-21 12:30:26 -0400
commit	5a409b4f56d50b212334f338cb8465d65550cd85 (patch)
tree	05c7ca56936fa8f658a2b08d09b74becd0d36f42
parent	448ec638c6bcf36923db695b6188373b933e7ddc (diff)

diff --git a/drivers/md/md.c b/drivers/md/md.c index df9eb1a04f26..6b4e2f29fe4e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c
@@ -130,6 +130,24 @@ static inline int speed_max(struct mddev *mddev)
130	mddev->sync_speed_max : sysctl_speed_limit_max;	130	mddev->sync_speed_max : sysctl_speed_limit_max;
131	}	131	}
132		132
		133	static void * flush_info_alloc(gfp_t gfp_flags, void *data)
		134	{
		135	return kzalloc(sizeof(struct flush_info), gfp_flags);
		136	}
		137	static void flush_info_free(void flush_info, void data)
		138	{
		139	kfree(flush_info);
		140	}
		141
		142	static void * flush_bio_alloc(gfp_t gfp_flags, void *data)
		143	{
		144	return kzalloc(sizeof(struct flush_bio), gfp_flags);
		145	}
		146	static void flush_bio_free(void flush_bio, void data)
		147	{
		148	kfree(flush_bio);
		149	}
		150
133	static struct ctl_table_header *raid_table_header;	151	static struct ctl_table_header *raid_table_header;
134		152
135	static struct ctl_table raid_table[] = {	153	static struct ctl_table raid_table[] = {
@@ -412,30 +430,53 @@ static int md_congested(void *data, int bits)
412	/*	430	/*
413	* Generic flush handling for md	431	* Generic flush handling for md
414	*/	432	*/
		433	static void submit_flushes(struct work_struct *ws)
		434	{
		435	struct flush_info *fi = container_of(ws, struct flush_info, flush_work);
		436	struct mddev *mddev = fi->mddev;
		437	struct bio *bio = fi->bio;
		438
		439	bio->bi_opf &= ~REQ_PREFLUSH;
		440	md_handle_request(mddev, bio);
		441
		442	mempool_free(fi, mddev->flush_pool);
		443	}
415		444
416	static void md_end_flush(struct bio *bio)	445	static void md_end_flush(struct bio *fbio)
417	{	446	{
418	struct md_rdev *rdev = bio->bi_private;	447	struct flush_bio *fb = fbio->bi_private;
419	struct mddev *mddev = rdev->mddev;	448	struct md_rdev *rdev = fb->rdev;
		449	struct flush_info *fi = fb->fi;
		450	struct bio *bio = fi->bio;
		451	struct mddev *mddev = fi->mddev;
420		452
421	rdev_dec_pending(rdev, mddev);	453	rdev_dec_pending(rdev, mddev);
422		454
423	if (atomic_dec_and_test(&mddev->flush_pending)) {	455	if (atomic_dec_and_test(&fi->flush_pending)) {
424	/* The pre-request flush has finished */	456	if (bio->bi_iter.bi_size == 0)
425	queue_work(md_wq, &mddev->flush_work);	457	/* an empty barrier - all done */
		458	bio_endio(bio);
		459	else {
		460	INIT_WORK(&fi->flush_work, submit_flushes);
		461	queue_work(md_wq, &fi->flush_work);
		462	}
426	}	463	}
427	bio_put(bio);
428	}
429		464
430	static void md_submit_flush_data(struct work_struct *ws);	465	mempool_free(fb, mddev->flush_bio_pool);
		466	bio_put(fbio);
		467	}
431		468
432	static void submit_flushes(struct work_struct *ws)	469	void md_flush_request(struct mddev mddev, struct bio bio)
433	{	470	{
434	struct mddev *mddev = container_of(ws, struct mddev, flush_work);
435	struct md_rdev *rdev;	471	struct md_rdev *rdev;
		472	struct flush_info *fi;
		473
		474	fi = mempool_alloc(mddev->flush_pool, GFP_NOIO);
		475
		476	fi->bio = bio;
		477	fi->mddev = mddev;
		478	atomic_set(&fi->flush_pending, 1);
436		479
437	INIT_WORK(&mddev->flush_work, md_submit_flush_data);
438	atomic_set(&mddev->flush_pending, 1);
439	rcu_read_lock();	480	rcu_read_lock();
440	rdev_for_each_rcu(rdev, mddev)	481	rdev_for_each_rcu(rdev, mddev)
441	if (rdev->raid_disk >= 0 &&	482	if (rdev->raid_disk >= 0 &&
@@ -445,59 +486,39 @@ static void submit_flushes(struct work_struct *ws)
445	* we reclaim rcu_read_lock	486	* we reclaim rcu_read_lock
446	*/	487	*/
447	struct bio *bi;	488	struct bio *bi;
		489	struct flush_bio *fb;
448	atomic_inc(&rdev->nr_pending);	490	atomic_inc(&rdev->nr_pending);
449	atomic_inc(&rdev->nr_pending);	491	atomic_inc(&rdev->nr_pending);
450	rcu_read_unlock();	492	rcu_read_unlock();
		493
		494	fb = mempool_alloc(mddev->flush_bio_pool, GFP_NOIO);
		495	fb->fi = fi;
		496	fb->rdev = rdev;
		497
451	bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);	498	bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
452	bi->bi_end_io = md_end_flush;
453	bi->bi_private = rdev;
454	bio_set_dev(bi, rdev->bdev);	499	bio_set_dev(bi, rdev->bdev);
		500	bi->bi_end_io = md_end_flush;
		501	bi->bi_private = fb;
455	bi->bi_opf = REQ_OP_WRITE \| REQ_PREFLUSH;	502	bi->bi_opf = REQ_OP_WRITE \| REQ_PREFLUSH;
456	atomic_inc(&mddev->flush_pending);	503
		504	atomic_inc(&fi->flush_pending);
457	submit_bio(bi);	505	submit_bio(bi);
		506
458	rcu_read_lock();	507	rcu_read_lock();
459	rdev_dec_pending(rdev, mddev);	508	rdev_dec_pending(rdev, mddev);
460	}	509	}
461	rcu_read_unlock();	510	rcu_read_unlock();
462	if (atomic_dec_and_test(&mddev->flush_pending))
463	queue_work(md_wq, &mddev->flush_work);
464	}
465
466	static void md_submit_flush_data(struct work_struct *ws)
467	{
468	struct mddev *mddev = container_of(ws, struct mddev, flush_work);
469	struct bio *bio = mddev->flush_bio;
470		511
471	/*	512	if (atomic_dec_and_test(&fi->flush_pending)) {
472	* must reset flush_bio before calling into md_handle_request to avoid a	513	if (bio->bi_iter.bi_size == 0)
473	* deadlock, because other bios passed md_handle_request suspend check	514	/* an empty barrier - all done */
474	* could wait for this and below md_handle_request could wait for those	515	bio_endio(bio);
475	* bios because of suspend check	516	else {
476	*/	517	INIT_WORK(&fi->flush_work, submit_flushes);
477	mddev->flush_bio = NULL;	518	queue_work(md_wq, &fi->flush_work);
478	wake_up(&mddev->sb_wait);	519	}
479
480	if (bio->bi_iter.bi_size == 0)
481	/* an empty barrier - all done */
482	bio_endio(bio);
483	else {
484	bio->bi_opf &= ~REQ_PREFLUSH;
485	md_handle_request(mddev, bio);
486	}	520	}
487	}	521	}
488
489	void md_flush_request(struct mddev mddev, struct bio bio)
490	{
491	spin_lock_irq(&mddev->lock);
492	wait_event_lock_irq(mddev->sb_wait,
493	!mddev->flush_bio,
494	mddev->lock);
495	mddev->flush_bio = bio;
496	spin_unlock_irq(&mddev->lock);
497
498	INIT_WORK(&mddev->flush_work, submit_flushes);
499	queue_work(md_wq, &mddev->flush_work);
500	}
501	EXPORT_SYMBOL(md_flush_request);	522	EXPORT_SYMBOL(md_flush_request);
502		523
503	static inline struct mddev mddev_get(struct mddev mddev)	524	static inline struct mddev mddev_get(struct mddev mddev)
@@ -555,7 +576,6 @@ void mddev_init(struct mddev *mddev)
555	atomic_set(&mddev->openers, 0);	576	atomic_set(&mddev->openers, 0);
556	atomic_set(&mddev->active_io, 0);	577	atomic_set(&mddev->active_io, 0);
557	spin_lock_init(&mddev->lock);	578	spin_lock_init(&mddev->lock);
558	atomic_set(&mddev->flush_pending, 0);
559	init_waitqueue_head(&mddev->sb_wait);	579	init_waitqueue_head(&mddev->sb_wait);
560	init_waitqueue_head(&mddev->recovery_wait);	580	init_waitqueue_head(&mddev->recovery_wait);
561	mddev->reshape_position = MaxSector;	581	mddev->reshape_position = MaxSector;
@@ -5510,6 +5530,22 @@ int md_run(struct mddev *mddev)
5510	goto abort;	5530	goto abort;
5511	}	5531	}
5512	}	5532	}
		5533	if (mddev->flush_pool == NULL) {
		5534	mddev->flush_pool = mempool_create(NR_FLUSH_INFOS, flush_info_alloc,
		5535	flush_info_free, mddev);
		5536	if (!mddev->flush_pool) {
		5537	err = -ENOMEM;
		5538	goto abort;
		5539	}
		5540	}
		5541	if (mddev->flush_bio_pool == NULL) {
		5542	mddev->flush_bio_pool = mempool_create(NR_FLUSH_BIOS, flush_bio_alloc,
		5543	flush_bio_free, mddev);
		5544	if (!mddev->flush_bio_pool) {
		5545	err = -ENOMEM;
		5546	goto abort;
		5547	}
		5548	}
5513		5549
5514	spin_lock(&pers_lock);	5550	spin_lock(&pers_lock);
5515	pers = find_pers(mddev->level, mddev->clevel);	5551	pers = find_pers(mddev->level, mddev->clevel);
@@ -5669,6 +5705,14 @@ int md_run(struct mddev *mddev)
5669	return 0;	5705	return 0;
5670		5706
5671	abort:	5707	abort:
		5708	if (mddev->flush_bio_pool) {
		5709	mempool_destroy(mddev->flush_bio_pool);
		5710	mddev->flush_bio_pool = NULL;
		5711	}
		5712	if (mddev->flush_pool){
		5713	mempool_destroy(mddev->flush_pool);
		5714	mddev->flush_pool = NULL;
		5715	}
5672	if (mddev->bio_set) {	5716	if (mddev->bio_set) {
5673	bioset_free(mddev->bio_set);	5717	bioset_free(mddev->bio_set);
5674	mddev->bio_set = NULL;	5718	mddev->bio_set = NULL;
@@ -5889,6 +5933,14 @@ void md_stop(struct mddev *mddev)
5889	* This is called from dm-raid	5933	* This is called from dm-raid
5890	*/	5934	*/
5891	__md_stop(mddev);	5935	__md_stop(mddev);
		5936	if (mddev->flush_bio_pool) {
		5937	mempool_destroy(mddev->flush_bio_pool);
		5938	mddev->flush_bio_pool = NULL;
		5939	}
		5940	if (mddev->flush_pool) {
		5941	mempool_destroy(mddev->flush_pool);
		5942	mddev->flush_pool = NULL;
		5943	}
5892	if (mddev->bio_set) {	5944	if (mddev->bio_set) {
5893	bioset_free(mddev->bio_set);	5945	bioset_free(mddev->bio_set);
5894	mddev->bio_set = NULL;	5946	mddev->bio_set = NULL;


diff --git a/drivers/md/md.h b/drivers/md/md.h index fbc925cce810..ffcb1ae217fe 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h
@@ -252,6 +252,19 @@ enum mddev_sb_flags {
252	MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */	252	MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */
253	};	253	};
254		254
		255	#define NR_FLUSH_INFOS 8
		256	#define NR_FLUSH_BIOS 64
		257	struct flush_info {
		258	struct bio *bio;
		259	struct mddev *mddev;
		260	struct work_struct flush_work;
		261	atomic_t flush_pending;
		262	};
		263	struct flush_bio {
		264	struct flush_info *fi;
		265	struct md_rdev *rdev;
		266	};
		267
255	struct mddev {	268	struct mddev {
256	void *private;	269	void *private;
257	struct md_personality *pers;	270	struct md_personality *pers;
@@ -457,13 +470,8 @@ struct mddev {
457	* metadata and bitmap writes	470	* metadata and bitmap writes
458	*/	471	*/
459		472
460	/* Generic flush handling.	473	mempool_t *flush_pool;
461	* The last to finish preflush schedules a worker to submit	474	mempool_t *flush_bio_pool;
462	* the rest of the request (without the REQ_PREFLUSH flag).
463	*/
464	struct bio *flush_bio;
465	atomic_t flush_pending;
466	struct work_struct flush_work;
467	struct work_struct event_work; /* used by dm to report failure event */	475	struct work_struct event_work; /* used by dm to report failure event */
468	void (sync_super)(struct mddev mddev, struct md_rdev *rdev);	476	void (sync_super)(struct mddev mddev, struct md_rdev *rdev);
469	struct md_cluster_info *cluster_info;	477	struct md_cluster_info *cluster_info;