aboutsummaryrefslogtreecommitdiffstats
path: root/block/blk-throttle.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-throttle.c')
-rw-r--r--block/blk-throttle.c139
1 files changed, 71 insertions, 68 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index e36cc10a346c..5352bdafbcf0 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -102,7 +102,7 @@ struct throtl_data
102 /* Work for dispatching throttled bios */ 102 /* Work for dispatching throttled bios */
103 struct delayed_work throtl_work; 103 struct delayed_work throtl_work;
104 104
105 atomic_t limits_changed; 105 bool limits_changed;
106}; 106};
107 107
108enum tg_state_flags { 108enum tg_state_flags {
@@ -201,6 +201,7 @@ static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td,
201 RB_CLEAR_NODE(&tg->rb_node); 201 RB_CLEAR_NODE(&tg->rb_node);
202 bio_list_init(&tg->bio_lists[0]); 202 bio_list_init(&tg->bio_lists[0]);
203 bio_list_init(&tg->bio_lists[1]); 203 bio_list_init(&tg->bio_lists[1]);
204 td->limits_changed = false;
204 205
205 /* 206 /*
206 * Take the initial reference that will be released on destroy 207 * Take the initial reference that will be released on destroy
@@ -737,34 +738,36 @@ static void throtl_process_limit_change(struct throtl_data *td)
737 struct throtl_grp *tg; 738 struct throtl_grp *tg;
738 struct hlist_node *pos, *n; 739 struct hlist_node *pos, *n;
739 740
740 if (!atomic_read(&td->limits_changed)) 741 if (!td->limits_changed)
741 return; 742 return;
742 743
743 throtl_log(td, "limit changed =%d", atomic_read(&td->limits_changed)); 744 xchg(&td->limits_changed, false);
744 745
745 /* 746 throtl_log(td, "limits changed");
746 * Make sure updates from throtl_update_blkio_group_read_bps() group
747 * of functions to tg->limits_changed are visible. We do not
748 * want update td->limits_changed to be visible but update to
749 * tg->limits_changed not being visible yet on this cpu. Hence
750 * the read barrier.
751 */
752 smp_rmb();
753 747
754 hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { 748 hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) {
755 if (throtl_tg_on_rr(tg) && tg->limits_changed) { 749 if (!tg->limits_changed)
756 throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu" 750 continue;
757 " riops=%u wiops=%u", tg->bps[READ], 751
758 tg->bps[WRITE], tg->iops[READ], 752 if (!xchg(&tg->limits_changed, false))
759 tg->iops[WRITE]); 753 continue;
754
755 throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu"
756 " riops=%u wiops=%u", tg->bps[READ], tg->bps[WRITE],
757 tg->iops[READ], tg->iops[WRITE]);
758
759 /*
760 * Restart the slices for both READ and WRITES. It
761 * might happen that a group's limit are dropped
762 * suddenly and we don't want to account recently
763 * dispatched IO with new low rate
764 */
765 throtl_start_new_slice(td, tg, 0);
766 throtl_start_new_slice(td, tg, 1);
767
768 if (throtl_tg_on_rr(tg))
760 tg_update_disptime(td, tg); 769 tg_update_disptime(td, tg);
761 tg->limits_changed = false;
762 }
763 } 770 }
764
765 smp_mb__before_atomic_dec();
766 atomic_dec(&td->limits_changed);
767 smp_mb__after_atomic_dec();
768} 771}
769 772
770/* Dispatch throttled bios. Should be called without queue lock held. */ 773/* Dispatch throttled bios. Should be called without queue lock held. */
@@ -774,6 +777,7 @@ static int throtl_dispatch(struct request_queue *q)
774 unsigned int nr_disp = 0; 777 unsigned int nr_disp = 0;
775 struct bio_list bio_list_on_stack; 778 struct bio_list bio_list_on_stack;
776 struct bio *bio; 779 struct bio *bio;
780 struct blk_plug plug;
777 781
778 spin_lock_irq(q->queue_lock); 782 spin_lock_irq(q->queue_lock);
779 783
@@ -802,9 +806,10 @@ out:
802 * immediate dispatch 806 * immediate dispatch
803 */ 807 */
804 if (nr_disp) { 808 if (nr_disp) {
809 blk_start_plug(&plug);
805 while((bio = bio_list_pop(&bio_list_on_stack))) 810 while((bio = bio_list_pop(&bio_list_on_stack)))
806 generic_make_request(bio); 811 generic_make_request(bio);
807 blk_unplug(q); 812 blk_finish_plug(&plug);
808 } 813 }
809 return nr_disp; 814 return nr_disp;
810} 815}
@@ -825,7 +830,8 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
825 830
826 struct delayed_work *dwork = &td->throtl_work; 831 struct delayed_work *dwork = &td->throtl_work;
827 832
828 if (total_nr_queued(td) > 0) { 833 /* schedule work if limits changed even if no bio is queued */
834 if (total_nr_queued(td) > 0 || td->limits_changed) {
829 /* 835 /*
830 * We might have a work scheduled to be executed in future. 836 * We might have a work scheduled to be executed in future.
831 * Cancel that and schedule a new one. 837 * Cancel that and schedule a new one.
@@ -898,6 +904,15 @@ void throtl_unlink_blkio_group(void *key, struct blkio_group *blkg)
898 spin_unlock_irqrestore(td->queue->queue_lock, flags); 904 spin_unlock_irqrestore(td->queue->queue_lock, flags);
899} 905}
900 906
907static void throtl_update_blkio_group_common(struct throtl_data *td,
908 struct throtl_grp *tg)
909{
910 xchg(&tg->limits_changed, true);
911 xchg(&td->limits_changed, true);
912 /* Schedule a work now to process the limit change */
913 throtl_schedule_delayed_work(td, 0);
914}
915
901/* 916/*
902 * For all update functions, key should be a valid pointer because these 917 * For all update functions, key should be a valid pointer because these
903 * update functions are called under blkcg_lock, that means, blkg is 918 * update functions are called under blkcg_lock, that means, blkg is
@@ -911,64 +926,43 @@ static void throtl_update_blkio_group_read_bps(void *key,
911 struct blkio_group *blkg, u64 read_bps) 926 struct blkio_group *blkg, u64 read_bps)
912{ 927{
913 struct throtl_data *td = key; 928 struct throtl_data *td = key;
929 struct throtl_grp *tg = tg_of_blkg(blkg);
914 930
915 tg_of_blkg(blkg)->bps[READ] = read_bps; 931 tg->bps[READ] = read_bps;
916 /* Make sure read_bps is updated before setting limits_changed */ 932 throtl_update_blkio_group_common(td, tg);
917 smp_wmb();
918 tg_of_blkg(blkg)->limits_changed = true;
919
920 /* Make sure tg->limits_changed is updated before td->limits_changed */
921 smp_mb__before_atomic_inc();
922 atomic_inc(&td->limits_changed);
923 smp_mb__after_atomic_inc();
924
925 /* Schedule a work now to process the limit change */
926 throtl_schedule_delayed_work(td, 0);
927} 933}
928 934
929static void throtl_update_blkio_group_write_bps(void *key, 935static void throtl_update_blkio_group_write_bps(void *key,
930 struct blkio_group *blkg, u64 write_bps) 936 struct blkio_group *blkg, u64 write_bps)
931{ 937{
932 struct throtl_data *td = key; 938 struct throtl_data *td = key;
939 struct throtl_grp *tg = tg_of_blkg(blkg);
933 940
934 tg_of_blkg(blkg)->bps[WRITE] = write_bps; 941 tg->bps[WRITE] = write_bps;
935 smp_wmb(); 942 throtl_update_blkio_group_common(td, tg);
936 tg_of_blkg(blkg)->limits_changed = true;
937 smp_mb__before_atomic_inc();
938 atomic_inc(&td->limits_changed);
939 smp_mb__after_atomic_inc();
940 throtl_schedule_delayed_work(td, 0);
941} 943}
942 944
943static void throtl_update_blkio_group_read_iops(void *key, 945static void throtl_update_blkio_group_read_iops(void *key,
944 struct blkio_group *blkg, unsigned int read_iops) 946 struct blkio_group *blkg, unsigned int read_iops)
945{ 947{
946 struct throtl_data *td = key; 948 struct throtl_data *td = key;
949 struct throtl_grp *tg = tg_of_blkg(blkg);
947 950
948 tg_of_blkg(blkg)->iops[READ] = read_iops; 951 tg->iops[READ] = read_iops;
949 smp_wmb(); 952 throtl_update_blkio_group_common(td, tg);
950 tg_of_blkg(blkg)->limits_changed = true;
951 smp_mb__before_atomic_inc();
952 atomic_inc(&td->limits_changed);
953 smp_mb__after_atomic_inc();
954 throtl_schedule_delayed_work(td, 0);
955} 953}
956 954
957static void throtl_update_blkio_group_write_iops(void *key, 955static void throtl_update_blkio_group_write_iops(void *key,
958 struct blkio_group *blkg, unsigned int write_iops) 956 struct blkio_group *blkg, unsigned int write_iops)
959{ 957{
960 struct throtl_data *td = key; 958 struct throtl_data *td = key;
959 struct throtl_grp *tg = tg_of_blkg(blkg);
961 960
962 tg_of_blkg(blkg)->iops[WRITE] = write_iops; 961 tg->iops[WRITE] = write_iops;
963 smp_wmb(); 962 throtl_update_blkio_group_common(td, tg);
964 tg_of_blkg(blkg)->limits_changed = true;
965 smp_mb__before_atomic_inc();
966 atomic_inc(&td->limits_changed);
967 smp_mb__after_atomic_inc();
968 throtl_schedule_delayed_work(td, 0);
969} 963}
970 964
971void throtl_shutdown_timer_wq(struct request_queue *q) 965static void throtl_shutdown_wq(struct request_queue *q)
972{ 966{
973 struct throtl_data *td = q->td; 967 struct throtl_data *td = q->td;
974 968
@@ -1009,20 +1003,28 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
1009 /* 1003 /*
1010 * There is already another bio queued in same dir. No 1004 * There is already another bio queued in same dir. No
1011 * need to update dispatch time. 1005 * need to update dispatch time.
1012 * Still update the disptime if rate limits on this group
1013 * were changed.
1014 */ 1006 */
1015 if (!tg->limits_changed) 1007 update_disptime = false;
1016 update_disptime = false;
1017 else
1018 tg->limits_changed = false;
1019
1020 goto queue_bio; 1008 goto queue_bio;
1009
1021 } 1010 }
1022 1011
1023 /* Bio is with-in rate limit of group */ 1012 /* Bio is with-in rate limit of group */
1024 if (tg_may_dispatch(td, tg, bio, NULL)) { 1013 if (tg_may_dispatch(td, tg, bio, NULL)) {
1025 throtl_charge_bio(tg, bio); 1014 throtl_charge_bio(tg, bio);
1015
1016 /*
1017 * We need to trim slice even when bios are not being queued
1018 * otherwise it might happen that a bio is not queued for
1019 * a long time and slice keeps on extending and trim is not
1020 * called for a long time. Now if limits are reduced suddenly
1021 * we take into account all the IO dispatched so far at new
1022 * low rate and * newly queued IO gets a really long dispatch
1023 * time.
1024 *
1025 * So keep on trimming slice even if bio is not queued.
1026 */
1027 throtl_trim_slice(td, tg, rw);
1026 goto out; 1028 goto out;
1027 } 1029 }
1028 1030
@@ -1058,7 +1060,7 @@ int blk_throtl_init(struct request_queue *q)
1058 1060
1059 INIT_HLIST_HEAD(&td->tg_list); 1061 INIT_HLIST_HEAD(&td->tg_list);
1060 td->tg_service_tree = THROTL_RB_ROOT; 1062 td->tg_service_tree = THROTL_RB_ROOT;
1061 atomic_set(&td->limits_changed, 0); 1063 td->limits_changed = false;
1062 1064
1063 /* Init root group */ 1065 /* Init root group */
1064 tg = &td->root_tg; 1066 tg = &td->root_tg;
@@ -1070,6 +1072,7 @@ int blk_throtl_init(struct request_queue *q)
1070 /* Practically unlimited BW */ 1072 /* Practically unlimited BW */
1071 tg->bps[0] = tg->bps[1] = -1; 1073 tg->bps[0] = tg->bps[1] = -1;
1072 tg->iops[0] = tg->iops[1] = -1; 1074 tg->iops[0] = tg->iops[1] = -1;
1075 td->limits_changed = false;
1073 1076
1074 /* 1077 /*
1075 * Set root group reference to 2. One reference will be dropped when 1078 * Set root group reference to 2. One reference will be dropped when
@@ -1102,7 +1105,7 @@ void blk_throtl_exit(struct request_queue *q)
1102 1105
1103 BUG_ON(!td); 1106 BUG_ON(!td);
1104 1107
1105 throtl_shutdown_timer_wq(q); 1108 throtl_shutdown_wq(q);
1106 1109
1107 spin_lock_irq(q->queue_lock); 1110 spin_lock_irq(q->queue_lock);
1108 throtl_release_tgs(td); 1111 throtl_release_tgs(td);
@@ -1132,7 +1135,7 @@ void blk_throtl_exit(struct request_queue *q)
1132 * update limits through cgroup and another work got queued, cancel 1135 * update limits through cgroup and another work got queued, cancel
1133 * it. 1136 * it.
1134 */ 1137 */
1135 throtl_shutdown_timer_wq(q); 1138 throtl_shutdown_wq(q);
1136 throtl_td_free(td); 1139 throtl_td_free(td);
1137} 1140}
1138 1141