1 files changed, 71 insertions, 68 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index e36cc10a346c..5352bdafbcf0 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -102,7 +102,7 @@ struct throtl_data
        /* Work for dispatching throttled bios */
        struct delayed_work throtl_work;
-        atomic_t limits_changed;
+        bool limits_changed;
 };
 enum tg_state_flags {
@@ -201,6 +201,7 @@ static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td,
        RB_CLEAR_NODE(&tg->rb_node);
        bio_list_init(&tg->bio_lists[0]);
        bio_list_init(&tg->bio_lists[1]);
+        td->limits_changed = false;
        /*
         * Take the initial reference that will be released on destroy
@@ -737,34 +738,36 @@ static void throtl_process_limit_change(struct throtl_data *td)
        struct throtl_grp *tg;
        struct hlist_node *pos, *n;
-        if (!atomic_read(&td->limits_changed))
+        if (!td->limits_changed)
                return;
-        throtl_log(td, "limit changed =%d", atomic_read(&td->limits_changed));
+        xchg(&td->limits_changed, false);
-        /*
+        throtl_log(td, "limits changed");
-         * Make sure updates from throtl_update_blkio_group_read_bps() group
-         * of functions to tg->limits_changed are visible. We do not
-         * want update td->limits_changed to be visible but update to
-         * tg->limits_changed not being visible yet on this cpu. Hence
-         * the read barrier.
-         */
-        smp_rmb();
        hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) {
-                if (throtl_tg_on_rr(tg) && tg->limits_changed) {
+                if (!tg->limits_changed)
-                        throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu"
+                        continue;
-                                " riops=%u wiops=%u", tg->bps[READ],
-                                tg->bps[WRITE], tg->iops[READ],
+                if (!xchg(&tg->limits_changed, false))
-                                tg->iops[WRITE]);
+                        continue;
+                throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu"
+                        " riops=%u wiops=%u", tg->bps[READ], tg->bps[WRITE],
+                        tg->iops[READ], tg->iops[WRITE]);
+                /*
+                 * Restart the slices for both READ and WRITES. It
+                 * might happen that a group's limit are dropped
+                 * suddenly and we don't want to account recently
+                 * dispatched IO with new low rate
+                 */
+                throtl_start_new_slice(td, tg, 0);
+                throtl_start_new_slice(td, tg, 1);
+                if (throtl_tg_on_rr(tg))
                        tg_update_disptime(td, tg);
-                        tg->limits_changed = false;
-                }
        }
-        smp_mb__before_atomic_dec();
-        atomic_dec(&td->limits_changed);
-        smp_mb__after_atomic_dec();
 }
 /* Dispatch throttled bios. Should be called without queue lock held. */
@@ -774,6 +777,7 @@ static int throtl_dispatch(struct request_queue *q)
        unsigned int nr_disp = 0;
        struct bio_list bio_list_on_stack;
        struct bio *bio;
+        struct blk_plug plug;
        spin_lock_irq(q->queue_lock);
@@ -802,9 +806,10 @@ out:
         * immediate dispatch
         */
        if (nr_disp) {
+                blk_start_plug(&plug);
                while((bio = bio_list_pop(&bio_list_on_stack)))
                        generic_make_request(bio);
-                blk_unplug(q);
+                blk_finish_plug(&plug);
        }
        return nr_disp;
 }
@@ -825,7 +830,8 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
        struct delayed_work *dwork = &td->throtl_work;
-        if (total_nr_queued(td) > 0) {
+        /* schedule work if limits changed even if no bio is queued */
+        if (total_nr_queued(td) > 0 || td->limits_changed) {
                /*
                 * We might have a work scheduled to be executed in future.
                 * Cancel that and schedule a new one.
@@ -898,6 +904,15 @@ void throtl_unlink_blkio_group(void *key, struct blkio_group *blkg)
        spin_unlock_irqrestore(td->queue->queue_lock, flags);
 }
+static void throtl_update_blkio_group_common(struct throtl_data *td,
+                                struct throtl_grp *tg)
+{
+        xchg(&tg->limits_changed, true);
+        xchg(&td->limits_changed, true);
+        /* Schedule a work now to process the limit change */
+        throtl_schedule_delayed_work(td, 0);
+}
 /*
 * For all update functions, key should be a valid pointer because these
 * update functions are called under blkcg_lock, that means, blkg is
@@ -911,64 +926,43 @@ static void throtl_update_blkio_group_read_bps(void *key,
                                struct blkio_group *blkg, u64 read_bps)
 {
        struct throtl_data *td = key;
+        struct throtl_grp *tg = tg_of_blkg(blkg);
-        tg_of_blkg(blkg)->bps[READ] = read_bps;
+        tg->bps[READ] = read_bps;
-        /* Make sure read_bps is updated before setting limits_changed */
+        throtl_update_blkio_group_common(td, tg);
-        smp_wmb();
-        tg_of_blkg(blkg)->limits_changed = true;
-        /* Make sure tg->limits_changed is updated before td->limits_changed */
-        smp_mb__before_atomic_inc();
-        atomic_inc(&td->limits_changed);
-        smp_mb__after_atomic_inc();
-        /* Schedule a work now to process the limit change */
-        throtl_schedule_delayed_work(td, 0);
 }
 static void throtl_update_blkio_group_write_bps(void *key,
                                struct blkio_group *blkg, u64 write_bps)
 {
        struct throtl_data *td = key;
+        struct throtl_grp *tg = tg_of_blkg(blkg);
-        tg_of_blkg(blkg)->bps[WRITE] = write_bps;
+        tg->bps[WRITE] = write_bps;
-        smp_wmb();
+        throtl_update_blkio_group_common(td, tg);
-        tg_of_blkg(blkg)->limits_changed = true;
-        smp_mb__before_atomic_inc();
-        atomic_inc(&td->limits_changed);
-        smp_mb__after_atomic_inc();
-        throtl_schedule_delayed_work(td, 0);
 }
 static void throtl_update_blkio_group_read_iops(void *key,
                        struct blkio_group *blkg, unsigned int read_iops)
 {
        struct throtl_data *td = key;
+        struct throtl_grp *tg = tg_of_blkg(blkg);
-        tg_of_blkg(blkg)->iops[READ] = read_iops;
+        tg->iops[READ] = read_iops;
-        smp_wmb();
+        throtl_update_blkio_group_common(td, tg);
-        tg_of_blkg(blkg)->limits_changed = true;
-        smp_mb__before_atomic_inc();
-        atomic_inc(&td->limits_changed);
-        smp_mb__after_atomic_inc();
-        throtl_schedule_delayed_work(td, 0);
 }
 static void throtl_update_blkio_group_write_iops(void *key,
                        struct blkio_group *blkg, unsigned int write_iops)
 {
        struct throtl_data *td = key;
+        struct throtl_grp *tg = tg_of_blkg(blkg);
-        tg_of_blkg(blkg)->iops[WRITE] = write_iops;
+        tg->iops[WRITE] = write_iops;
-        smp_wmb();
+        throtl_update_blkio_group_common(td, tg);
-        tg_of_blkg(blkg)->limits_changed = true;
-        smp_mb__before_atomic_inc();
-        atomic_inc(&td->limits_changed);
-        smp_mb__after_atomic_inc();
-        throtl_schedule_delayed_work(td, 0);
 }
-void throtl_shutdown_timer_wq(struct request_queue *q)
+static void throtl_shutdown_wq(struct request_queue *q)
 {
        struct throtl_data *td = q->td;
@@ -1009,20 +1003,28 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
                /*
                 * There is already another bio queued in same dir. No
                 * need to update dispatch time.
-                 * Still update the disptime if rate limits on this group
-                 * were changed.
                 */
-                if (!tg->limits_changed)
+                update_disptime = false;
-                        update_disptime = false;
-                else
-                        tg->limits_changed = false;
                goto queue_bio;
        }
        /* Bio is with-in rate limit of group */
        if (tg_may_dispatch(td, tg, bio, NULL)) {
                throtl_charge_bio(tg, bio);
+                /*
+                 * We need to trim slice even when bios are not being queued
+                 * otherwise it might happen that a bio is not queued for
+                 * a long time and slice keeps on extending and trim is not
+                 * called for a long time. Now if limits are reduced suddenly
+                 * we take into account all the IO dispatched so far at new
+                 * low rate and * newly queued IO gets a really long dispatch
+                 * time.
+                 *
+                 * So keep on trimming slice even if bio is not queued.
+                 */
+                throtl_trim_slice(td, tg, rw);
                goto out;
        }
@@ -1058,7 +1060,7 @@ int blk_throtl_init(struct request_queue *q)
        INIT_HLIST_HEAD(&td->tg_list);
        td->tg_service_tree = THROTL_RB_ROOT;
-        atomic_set(&td->limits_changed, 0);
+        td->limits_changed = false;
        /* Init root group */
        tg = &td->root_tg;
@@ -1070,6 +1072,7 @@ int blk_throtl_init(struct request_queue *q)
        /* Practically unlimited BW */
        tg->bps[0] = tg->bps[1] = -1;
        tg->iops[0] = tg->iops[1] = -1;
+        td->limits_changed = false;
        /*
         * Set root group reference to 2. One reference will be dropped when
@@ -1102,7 +1105,7 @@ void blk_throtl_exit(struct request_queue *q)
        BUG_ON(!td);
-        throtl_shutdown_timer_wq(q);
+        throtl_shutdown_wq(q);
        spin_lock_irq(q->queue_lock);
        throtl_release_tgs(td);
@@ -1132,7 +1135,7 @@ void blk_throtl_exit(struct request_queue *q)
         * update limits through cgroup and another work got queued, cancel
         * it.
         */
-        throtl_shutdown_timer_wq(q);
+        throtl_shutdown_wq(q);
        throtl_td_free(td);
 }

diff --git a/block/blk-throttle.c b/block/blk-throttle.c index e36cc10a346c..5352bdafbcf0 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c
@@ -102,7 +102,7 @@ struct throtl_data
102	/* Work for dispatching throttled bios */	102	/* Work for dispatching throttled bios */
103	struct delayed_work throtl_work;	103	struct delayed_work throtl_work;
104		104
105	atomic_t limits_changed;	105	bool limits_changed;
106	};	106	};
107		107
108	enum tg_state_flags {	108	enum tg_state_flags {
@@ -201,6 +201,7 @@ static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td,
201	RB_CLEAR_NODE(&tg->rb_node);	201	RB_CLEAR_NODE(&tg->rb_node);
202	bio_list_init(&tg->bio_lists[0]);	202	bio_list_init(&tg->bio_lists[0]);
203	bio_list_init(&tg->bio_lists[1]);	203	bio_list_init(&tg->bio_lists[1]);
		204	td->limits_changed = false;
204		205
205	/*	206	/*
206	* Take the initial reference that will be released on destroy	207	* Take the initial reference that will be released on destroy
@@ -737,34 +738,36 @@ static void throtl_process_limit_change(struct throtl_data *td)
737	struct throtl_grp *tg;	738	struct throtl_grp *tg;
738	struct hlist_node pos, n;	739	struct hlist_node pos, n;
739		740
740	if (!atomic_read(&td->limits_changed))	741	if (!td->limits_changed)
741	return;	742	return;
742		743
743	throtl_log(td, "limit changed =%d", atomic_read(&td->limits_changed));	744	xchg(&td->limits_changed, false);
744		745
745	/*	746	throtl_log(td, "limits changed");
746	* Make sure updates from throtl_update_blkio_group_read_bps() group
747	* of functions to tg->limits_changed are visible. We do not
748	* want update td->limits_changed to be visible but update to
749	* tg->limits_changed not being visible yet on this cpu. Hence
750	* the read barrier.
751	*/
752	smp_rmb();
753		747
754	hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) {	748	hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) {
755	if (throtl_tg_on_rr(tg) && tg->limits_changed) {	749	if (!tg->limits_changed)
756	throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu"	750	continue;
757	" riops=%u wiops=%u", tg->bps[READ],	751
758	tg->bps[WRITE], tg->iops[READ],	752	if (!xchg(&tg->limits_changed, false))
759	tg->iops[WRITE]);	753	continue;
		754
		755	throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu"
		756	" riops=%u wiops=%u", tg->bps[READ], tg->bps[WRITE],
		757	tg->iops[READ], tg->iops[WRITE]);
		758
		759	/*
		760	* Restart the slices for both READ and WRITES. It
		761	* might happen that a group's limit are dropped
		762	* suddenly and we don't want to account recently
		763	* dispatched IO with new low rate
		764	*/
		765	throtl_start_new_slice(td, tg, 0);
		766	throtl_start_new_slice(td, tg, 1);
		767
		768	if (throtl_tg_on_rr(tg))
760	tg_update_disptime(td, tg);	769	tg_update_disptime(td, tg);
761	tg->limits_changed = false;
762	}
763	}	770	}
764
765	smp_mb__before_atomic_dec();
766	atomic_dec(&td->limits_changed);
767	smp_mb__after_atomic_dec();
768	}	771	}
769		772
770	/* Dispatch throttled bios. Should be called without queue lock held. */	773	/* Dispatch throttled bios. Should be called without queue lock held. */
@@ -774,6 +777,7 @@ static int throtl_dispatch(struct request_queue *q)
774	unsigned int nr_disp = 0;	777	unsigned int nr_disp = 0;
775	struct bio_list bio_list_on_stack;	778	struct bio_list bio_list_on_stack;
776	struct bio *bio;	779	struct bio *bio;
		780	struct blk_plug plug;
777		781
778	spin_lock_irq(q->queue_lock);	782	spin_lock_irq(q->queue_lock);
779		783
@@ -802,9 +806,10 @@ out:
802	* immediate dispatch	806	* immediate dispatch
803	*/	807	*/
804	if (nr_disp) {	808	if (nr_disp) {
		809	blk_start_plug(&plug);
805	while((bio = bio_list_pop(&bio_list_on_stack)))	810	while((bio = bio_list_pop(&bio_list_on_stack)))
806	generic_make_request(bio);	811	generic_make_request(bio);
807	blk_unplug(q);	812	blk_finish_plug(&plug);
808	}	813	}
809	return nr_disp;	814	return nr_disp;
810	}	815	}
@@ -825,7 +830,8 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
825		830
826	struct delayed_work *dwork = &td->throtl_work;	831	struct delayed_work *dwork = &td->throtl_work;
827		832
828	if (total_nr_queued(td) > 0) {	833	/* schedule work if limits changed even if no bio is queued */
		834	if (total_nr_queued(td) > 0 \|\| td->limits_changed) {
829	/*	835	/*
830	* We might have a work scheduled to be executed in future.	836	* We might have a work scheduled to be executed in future.
831	* Cancel that and schedule a new one.	837	* Cancel that and schedule a new one.
@@ -898,6 +904,15 @@ void throtl_unlink_blkio_group(void key, struct blkio_group blkg)
898	spin_unlock_irqrestore(td->queue->queue_lock, flags);	904	spin_unlock_irqrestore(td->queue->queue_lock, flags);
899	}	905	}
900		906
		907	static void throtl_update_blkio_group_common(struct throtl_data *td,
		908	struct throtl_grp *tg)
		909	{
		910	xchg(&tg->limits_changed, true);
		911	xchg(&td->limits_changed, true);
		912	/* Schedule a work now to process the limit change */
		913	throtl_schedule_delayed_work(td, 0);
		914	}
		915
901	/*	916	/*
902	* For all update functions, key should be a valid pointer because these	917	* For all update functions, key should be a valid pointer because these
903	* update functions are called under blkcg_lock, that means, blkg is	918	* update functions are called under blkcg_lock, that means, blkg is
@@ -911,64 +926,43 @@ static void throtl_update_blkio_group_read_bps(void *key,
911	struct blkio_group *blkg, u64 read_bps)	926	struct blkio_group *blkg, u64 read_bps)
912	{	927	{
913	struct throtl_data *td = key;	928	struct throtl_data *td = key;
		929	struct throtl_grp *tg = tg_of_blkg(blkg);
914		930
915	tg_of_blkg(blkg)->bps[READ] = read_bps;	931	tg->bps[READ] = read_bps;
916	/* Make sure read_bps is updated before setting limits_changed */	932	throtl_update_blkio_group_common(td, tg);
917	smp_wmb();
918	tg_of_blkg(blkg)->limits_changed = true;
919
920	/* Make sure tg->limits_changed is updated before td->limits_changed */
921	smp_mb__before_atomic_inc();
922	atomic_inc(&td->limits_changed);
923	smp_mb__after_atomic_inc();
924
925	/* Schedule a work now to process the limit change */
926	throtl_schedule_delayed_work(td, 0);
927	}	933	}
928		934
929	static void throtl_update_blkio_group_write_bps(void *key,	935	static void throtl_update_blkio_group_write_bps(void *key,
930	struct blkio_group *blkg, u64 write_bps)	936	struct blkio_group *blkg, u64 write_bps)
931	{	937	{
932	struct throtl_data *td = key;	938	struct throtl_data *td = key;
		939	struct throtl_grp *tg = tg_of_blkg(blkg);
933		940
934	tg_of_blkg(blkg)->bps[WRITE] = write_bps;	941	tg->bps[WRITE] = write_bps;
935	smp_wmb();	942	throtl_update_blkio_group_common(td, tg);
936	tg_of_blkg(blkg)->limits_changed = true;
937	smp_mb__before_atomic_inc();
938	atomic_inc(&td->limits_changed);
939	smp_mb__after_atomic_inc();
940	throtl_schedule_delayed_work(td, 0);
941	}	943	}
942		944
943	static void throtl_update_blkio_group_read_iops(void *key,	945	static void throtl_update_blkio_group_read_iops(void *key,
944	struct blkio_group *blkg, unsigned int read_iops)	946	struct blkio_group *blkg, unsigned int read_iops)
945	{	947	{
946	struct throtl_data *td = key;	948	struct throtl_data *td = key;
		949	struct throtl_grp *tg = tg_of_blkg(blkg);
947		950
948	tg_of_blkg(blkg)->iops[READ] = read_iops;	951	tg->iops[READ] = read_iops;
949	smp_wmb();	952	throtl_update_blkio_group_common(td, tg);
950	tg_of_blkg(blkg)->limits_changed = true;
951	smp_mb__before_atomic_inc();
952	atomic_inc(&td->limits_changed);
953	smp_mb__after_atomic_inc();
954	throtl_schedule_delayed_work(td, 0);
955	}	953	}
956		954
957	static void throtl_update_blkio_group_write_iops(void *key,	955	static void throtl_update_blkio_group_write_iops(void *key,
958	struct blkio_group *blkg, unsigned int write_iops)	956	struct blkio_group *blkg, unsigned int write_iops)
959	{	957	{
960	struct throtl_data *td = key;	958	struct throtl_data *td = key;
		959	struct throtl_grp *tg = tg_of_blkg(blkg);
961		960
962	tg_of_blkg(blkg)->iops[WRITE] = write_iops;	961	tg->iops[WRITE] = write_iops;
963	smp_wmb();	962	throtl_update_blkio_group_common(td, tg);
964	tg_of_blkg(blkg)->limits_changed = true;
965	smp_mb__before_atomic_inc();
966	atomic_inc(&td->limits_changed);
967	smp_mb__after_atomic_inc();
968	throtl_schedule_delayed_work(td, 0);
969	}	963	}
970		964
971	void throtl_shutdown_timer_wq(struct request_queue *q)	965	static void throtl_shutdown_wq(struct request_queue *q)
972	{	966	{
973	struct throtl_data *td = q->td;	967	struct throtl_data *td = q->td;
974		968
@@ -1009,20 +1003,28 @@ int blk_throtl_bio(struct request_queue q, struct bio *biop)
1009	/*	1003	/*
1010	* There is already another bio queued in same dir. No	1004	* There is already another bio queued in same dir. No
1011	* need to update dispatch time.	1005	* need to update dispatch time.
1012	* Still update the disptime if rate limits on this group
1013	* were changed.
1014	*/	1006	*/
1015	if (!tg->limits_changed)	1007	update_disptime = false;
1016	update_disptime = false;
1017	else
1018	tg->limits_changed = false;
1019
1020	goto queue_bio;	1008	goto queue_bio;
		1009
1021	}	1010	}
1022		1011
1023	/* Bio is with-in rate limit of group */	1012	/* Bio is with-in rate limit of group */
1024	if (tg_may_dispatch(td, tg, bio, NULL)) {	1013	if (tg_may_dispatch(td, tg, bio, NULL)) {
1025	throtl_charge_bio(tg, bio);	1014	throtl_charge_bio(tg, bio);
		1015
		1016	/*
		1017	* We need to trim slice even when bios are not being queued
		1018	* otherwise it might happen that a bio is not queued for
		1019	* a long time and slice keeps on extending and trim is not
		1020	* called for a long time. Now if limits are reduced suddenly
		1021	* we take into account all the IO dispatched so far at new
		1022	* low rate and * newly queued IO gets a really long dispatch
		1023	* time.
		1024	*
		1025	* So keep on trimming slice even if bio is not queued.
		1026	*/
		1027	throtl_trim_slice(td, tg, rw);
1026	goto out;	1028	goto out;
1027	}	1029	}
1028		1030
@@ -1058,7 +1060,7 @@ int blk_throtl_init(struct request_queue *q)
1058		1060
1059	INIT_HLIST_HEAD(&td->tg_list);	1061	INIT_HLIST_HEAD(&td->tg_list);
1060	td->tg_service_tree = THROTL_RB_ROOT;	1062	td->tg_service_tree = THROTL_RB_ROOT;
1061	atomic_set(&td->limits_changed, 0);	1063	td->limits_changed = false;
1062		1064
1063	/* Init root group */	1065	/* Init root group */
1064	tg = &td->root_tg;	1066	tg = &td->root_tg;
@@ -1070,6 +1072,7 @@ int blk_throtl_init(struct request_queue *q)
1070	/* Practically unlimited BW */	1072	/* Practically unlimited BW */
1071	tg->bps[0] = tg->bps[1] = -1;	1073	tg->bps[0] = tg->bps[1] = -1;
1072	tg->iops[0] = tg->iops[1] = -1;	1074	tg->iops[0] = tg->iops[1] = -1;
		1075	td->limits_changed = false;
1073		1076
1074	/*	1077	/*
1075	* Set root group reference to 2. One reference will be dropped when	1078	* Set root group reference to 2. One reference will be dropped when
@@ -1102,7 +1105,7 @@ void blk_throtl_exit(struct request_queue *q)
1102		1105
1103	BUG_ON(!td);	1106	BUG_ON(!td);
1104		1107
1105	throtl_shutdown_timer_wq(q);	1108	throtl_shutdown_wq(q);
1106		1109
1107	spin_lock_irq(q->queue_lock);	1110	spin_lock_irq(q->queue_lock);
1108	throtl_release_tgs(td);	1111	throtl_release_tgs(td);
@@ -1132,7 +1135,7 @@ void blk_throtl_exit(struct request_queue *q)
1132	* update limits through cgroup and another work got queued, cancel	1135	* update limits through cgroup and another work got queued, cancel
1133	* it.	1136	* it.
1134	*/	1137	*/
1135	throtl_shutdown_timer_wq(q);	1138	throtl_shutdown_wq(q);
1136	throtl_td_free(td);	1139	throtl_td_free(td);
1137	}	1140	}
1138		1141