aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorVivek Goyal <vgoyal@redhat.com>2010-10-01 08:49:49 -0400
committerJens Axboe <jaxboe@fusionio.com>2010-10-01 08:49:49 -0400
commitfe0714377ee2ca161bf2afb7773e22f15f1786d4 (patch)
tree09f5e8686d741d012333c92251b8cc66793ef916 /block
parent02977e4af7ed3b478c505e50491ffdf3e1314cf4 (diff)
blkio: Recalculate the throttled bio dispatch time upon throttle limit change
o Currently any cgroup throttle limit changes are processed asynchronousy and the change does not take affect till a new bio is dispatched from same group. o It might happen that a user sets a redicuously low limit on throttling. Say 1 bytes per second on reads. In such cases simple operations like mount a disk can wait for a very long time. o Once bio is throttled, there is no easy way to come out of that wait even if user increases the read limit later. o This patch fixes it. Now if a user changes the cgroup limits, we recalculate the bio dispatch time according to new limits. o Can't take queueu lock under blkcg_lock, hence after the change I wake up the dispatch thread again which recalculates the time. So there are some variables being synchronized across two threads without lock and I had to make use of barriers. Hoping I have used barriers correctly. Any review of memory barrier code especially will help. Signed-off-by: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'block')
-rw-r--r--block/blk-cgroup.c15
-rw-r--r--block/blk-cgroup.h21
-rw-r--r--block/blk-throttle.c134
-rw-r--r--block/cfq-iosched.c4
4 files changed, 139 insertions, 35 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index b06ca70354e3..52c12130a5de 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -124,7 +124,8 @@ blkio_update_group_weight(struct blkio_group *blkg, unsigned int weight)
124 if (blkiop->plid != blkg->plid) 124 if (blkiop->plid != blkg->plid)
125 continue; 125 continue;
126 if (blkiop->ops.blkio_update_group_weight_fn) 126 if (blkiop->ops.blkio_update_group_weight_fn)
127 blkiop->ops.blkio_update_group_weight_fn(blkg, weight); 127 blkiop->ops.blkio_update_group_weight_fn(blkg->key,
128 blkg, weight);
128 } 129 }
129} 130}
130 131
@@ -141,11 +142,13 @@ static inline void blkio_update_group_bps(struct blkio_group *blkg, u64 bps,
141 142
142 if (fileid == BLKIO_THROTL_read_bps_device 143 if (fileid == BLKIO_THROTL_read_bps_device
143 && blkiop->ops.blkio_update_group_read_bps_fn) 144 && blkiop->ops.blkio_update_group_read_bps_fn)
144 blkiop->ops.blkio_update_group_read_bps_fn(blkg, bps); 145 blkiop->ops.blkio_update_group_read_bps_fn(blkg->key,
146 blkg, bps);
145 147
146 if (fileid == BLKIO_THROTL_write_bps_device 148 if (fileid == BLKIO_THROTL_write_bps_device
147 && blkiop->ops.blkio_update_group_write_bps_fn) 149 && blkiop->ops.blkio_update_group_write_bps_fn)
148 blkiop->ops.blkio_update_group_write_bps_fn(blkg, bps); 150 blkiop->ops.blkio_update_group_write_bps_fn(blkg->key,
151 blkg, bps);
149 } 152 }
150} 153}
151 154
@@ -162,11 +165,13 @@ static inline void blkio_update_group_iops(struct blkio_group *blkg,
162 165
163 if (fileid == BLKIO_THROTL_read_iops_device 166 if (fileid == BLKIO_THROTL_read_iops_device
164 && blkiop->ops.blkio_update_group_read_iops_fn) 167 && blkiop->ops.blkio_update_group_read_iops_fn)
165 blkiop->ops.blkio_update_group_read_iops_fn(blkg, iops); 168 blkiop->ops.blkio_update_group_read_iops_fn(blkg->key,
169 blkg, iops);
166 170
167 if (fileid == BLKIO_THROTL_write_iops_device 171 if (fileid == BLKIO_THROTL_write_iops_device
168 && blkiop->ops.blkio_update_group_write_iops_fn) 172 && blkiop->ops.blkio_update_group_write_iops_fn)
169 blkiop->ops.blkio_update_group_write_iops_fn(blkg,iops); 173 blkiop->ops.blkio_update_group_write_iops_fn(blkg->key,
174 blkg,iops);
170 } 175 }
171} 176}
172 177
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 2070053a30b1..034c35562dba 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -186,16 +186,17 @@ extern unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg,
186 dev_t dev); 186 dev_t dev);
187 187
188typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg); 188typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
189typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg, 189
190 unsigned int weight); 190typedef void (blkio_update_group_weight_fn) (void *key,
191typedef void (blkio_update_group_read_bps_fn) (struct blkio_group *blkg, 191 struct blkio_group *blkg, unsigned int weight);
192 u64 read_bps); 192typedef void (blkio_update_group_read_bps_fn) (void * key,
193typedef void (blkio_update_group_write_bps_fn) (struct blkio_group *blkg, 193 struct blkio_group *blkg, u64 read_bps);
194 u64 write_bps); 194typedef void (blkio_update_group_write_bps_fn) (void *key,
195typedef void (blkio_update_group_read_iops_fn) (struct blkio_group *blkg, 195 struct blkio_group *blkg, u64 write_bps);
196 unsigned int read_iops); 196typedef void (blkio_update_group_read_iops_fn) (void *key,
197typedef void (blkio_update_group_write_iops_fn) (struct blkio_group *blkg, 197 struct blkio_group *blkg, unsigned int read_iops);
198 unsigned int write_iops); 198typedef void (blkio_update_group_write_iops_fn) (void *key,
199 struct blkio_group *blkg, unsigned int write_iops);
199 200
200struct blkio_policy_ops { 201struct blkio_policy_ops {
201 blkio_unlink_group_fn *blkio_unlink_group_fn; 202 blkio_unlink_group_fn *blkio_unlink_group_fn;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index bc2936b80add..11713ed852f4 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -70,6 +70,9 @@ struct throtl_grp {
70 /* When did we start a new slice */ 70 /* When did we start a new slice */
71 unsigned long slice_start[2]; 71 unsigned long slice_start[2];
72 unsigned long slice_end[2]; 72 unsigned long slice_end[2];
73
74 /* Some throttle limits got updated for the group */
75 bool limits_changed;
73}; 76};
74 77
75struct throtl_data 78struct throtl_data
@@ -93,6 +96,8 @@ struct throtl_data
93 96
94 /* Work for dispatching throttled bios */ 97 /* Work for dispatching throttled bios */
95 struct delayed_work throtl_work; 98 struct delayed_work throtl_work;
99
100 atomic_t limits_changed;
96}; 101};
97 102
98enum tg_state_flags { 103enum tg_state_flags {
@@ -592,15 +597,6 @@ static void tg_update_disptime(struct throtl_data *td, struct throtl_grp *tg)
592 min_wait = min(read_wait, write_wait); 597 min_wait = min(read_wait, write_wait);
593 disptime = jiffies + min_wait; 598 disptime = jiffies + min_wait;
594 599
595 /*
596 * If group is already on active tree, then update dispatch time
597 * only if it is lesser than existing dispatch time. Otherwise
598 * always update the dispatch time
599 */
600
601 if (throtl_tg_on_rr(tg) && time_before(disptime, tg->disptime))
602 return;
603
604 /* Update dispatch time */ 600 /* Update dispatch time */
605 throtl_dequeue_tg(td, tg); 601 throtl_dequeue_tg(td, tg);
606 tg->disptime = disptime; 602 tg->disptime = disptime;
@@ -691,6 +687,46 @@ static int throtl_select_dispatch(struct throtl_data *td, struct bio_list *bl)
691 return nr_disp; 687 return nr_disp;
692} 688}
693 689
690static void throtl_process_limit_change(struct throtl_data *td)
691{
692 struct throtl_grp *tg;
693 struct hlist_node *pos, *n;
694
695 /*
696 * Make sure atomic_inc() effects from
697 * throtl_update_blkio_group_read_bps(), group of functions are
698 * visible.
699 * Is this required or smp_mb__after_atomic_inc() was suffcient
700 * after the atomic_inc().
701 */
702 smp_rmb();
703 if (!atomic_read(&td->limits_changed))
704 return;
705
706 throtl_log(td, "limit changed =%d", atomic_read(&td->limits_changed));
707
708 hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) {
709 /*
710 * Do I need an smp_rmb() here to make sure tg->limits_changed
711 * update is visible. I am relying on smp_rmb() at the
712 * beginning of function and not putting a new one here.
713 */
714
715 if (throtl_tg_on_rr(tg) && tg->limits_changed) {
716 throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu"
717 " riops=%u wiops=%u", tg->bps[READ],
718 tg->bps[WRITE], tg->iops[READ],
719 tg->iops[WRITE]);
720 tg_update_disptime(td, tg);
721 tg->limits_changed = false;
722 }
723 }
724
725 smp_mb__before_atomic_dec();
726 atomic_dec(&td->limits_changed);
727 smp_mb__after_atomic_dec();
728}
729
694/* Dispatch throttled bios. Should be called without queue lock held. */ 730/* Dispatch throttled bios. Should be called without queue lock held. */
695static int throtl_dispatch(struct request_queue *q) 731static int throtl_dispatch(struct request_queue *q)
696{ 732{
@@ -701,6 +737,8 @@ static int throtl_dispatch(struct request_queue *q)
701 737
702 spin_lock_irq(q->queue_lock); 738 spin_lock_irq(q->queue_lock);
703 739
740 throtl_process_limit_change(td);
741
704 if (!total_nr_queued(td)) 742 if (!total_nr_queued(td))
705 goto out; 743 goto out;
706 744
@@ -821,28 +859,74 @@ void throtl_unlink_blkio_group(void *key, struct blkio_group *blkg)
821 spin_unlock_irqrestore(td->queue->queue_lock, flags); 859 spin_unlock_irqrestore(td->queue->queue_lock, flags);
822} 860}
823 861
824static void throtl_update_blkio_group_read_bps (struct blkio_group *blkg, 862/*
825 u64 read_bps) 863 * For all update functions, key should be a valid pointer because these
864 * update functions are called under blkcg_lock, that means, blkg is
865 * valid and in turn key is valid. queue exit path can not race becuase
866 * of blkcg_lock
867 *
868 * Can not take queue lock in update functions as queue lock under blkcg_lock
869 * is not allowed. Under other paths we take blkcg_lock under queue_lock.
870 */
871static void throtl_update_blkio_group_read_bps(void *key,
872 struct blkio_group *blkg, u64 read_bps)
826{ 873{
874 struct throtl_data *td = key;
875
827 tg_of_blkg(blkg)->bps[READ] = read_bps; 876 tg_of_blkg(blkg)->bps[READ] = read_bps;
877 /* Make sure read_bps is updated before setting limits_changed */
878 smp_wmb();
879 tg_of_blkg(blkg)->limits_changed = true;
880
881 /* Make sure tg->limits_changed is updated before td->limits_changed */
882 smp_mb__before_atomic_inc();
883 atomic_inc(&td->limits_changed);
884 smp_mb__after_atomic_inc();
885
886 /* Schedule a work now to process the limit change */
887 throtl_schedule_delayed_work(td->queue, 0);
828} 888}
829 889
830static void throtl_update_blkio_group_write_bps (struct blkio_group *blkg, 890static void throtl_update_blkio_group_write_bps(void *key,
831 u64 write_bps) 891 struct blkio_group *blkg, u64 write_bps)
832{ 892{
893 struct throtl_data *td = key;
894
833 tg_of_blkg(blkg)->bps[WRITE] = write_bps; 895 tg_of_blkg(blkg)->bps[WRITE] = write_bps;
896 smp_wmb();
897 tg_of_blkg(blkg)->limits_changed = true;
898 smp_mb__before_atomic_inc();
899 atomic_inc(&td->limits_changed);
900 smp_mb__after_atomic_inc();
901 throtl_schedule_delayed_work(td->queue, 0);
834} 902}
835 903
836static void throtl_update_blkio_group_read_iops (struct blkio_group *blkg, 904static void throtl_update_blkio_group_read_iops(void *key,
837 unsigned int read_iops) 905 struct blkio_group *blkg, unsigned int read_iops)
838{ 906{
907 struct throtl_data *td = key;
908
839 tg_of_blkg(blkg)->iops[READ] = read_iops; 909 tg_of_blkg(blkg)->iops[READ] = read_iops;
910 smp_wmb();
911 tg_of_blkg(blkg)->limits_changed = true;
912 smp_mb__before_atomic_inc();
913 atomic_inc(&td->limits_changed);
914 smp_mb__after_atomic_inc();
915 throtl_schedule_delayed_work(td->queue, 0);
840} 916}
841 917
842static void throtl_update_blkio_group_write_iops (struct blkio_group *blkg, 918static void throtl_update_blkio_group_write_iops(void *key,
843 unsigned int write_iops) 919 struct blkio_group *blkg, unsigned int write_iops)
844{ 920{
921 struct throtl_data *td = key;
922
845 tg_of_blkg(blkg)->iops[WRITE] = write_iops; 923 tg_of_blkg(blkg)->iops[WRITE] = write_iops;
924 smp_wmb();
925 tg_of_blkg(blkg)->limits_changed = true;
926 smp_mb__before_atomic_inc();
927 atomic_inc(&td->limits_changed);
928 smp_mb__after_atomic_inc();
929 throtl_schedule_delayed_work(td->queue, 0);
846} 930}
847 931
848void throtl_shutdown_timer_wq(struct request_queue *q) 932void throtl_shutdown_timer_wq(struct request_queue *q)
@@ -886,8 +970,14 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
886 /* 970 /*
887 * There is already another bio queued in same dir. No 971 * There is already another bio queued in same dir. No
888 * need to update dispatch time. 972 * need to update dispatch time.
973 * Still update the disptime if rate limits on this group
974 * were changed.
889 */ 975 */
890 update_disptime = false; 976 if (!tg->limits_changed)
977 update_disptime = false;
978 else
979 tg->limits_changed = false;
980
891 goto queue_bio; 981 goto queue_bio;
892 } 982 }
893 983
@@ -929,6 +1019,7 @@ int blk_throtl_init(struct request_queue *q)
929 1019
930 INIT_HLIST_HEAD(&td->tg_list); 1020 INIT_HLIST_HEAD(&td->tg_list);
931 td->tg_service_tree = THROTL_RB_ROOT; 1021 td->tg_service_tree = THROTL_RB_ROOT;
1022 atomic_set(&td->limits_changed, 0);
932 1023
933 /* Init root group */ 1024 /* Init root group */
934 tg = &td->root_tg; 1025 tg = &td->root_tg;
@@ -996,6 +1087,13 @@ void blk_throtl_exit(struct request_queue *q)
996 */ 1087 */
997 if (wait) 1088 if (wait)
998 synchronize_rcu(); 1089 synchronize_rcu();
1090
1091 /*
1092 * Just being safe to make sure after previous flush if some body did
1093 * update limits through cgroup and another work got queued, cancel
1094 * it.
1095 */
1096 throtl_shutdown_timer_wq(q);
999 throtl_td_free(td); 1097 throtl_td_free(td);
1000} 1098}
1001 1099
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 684592621736..86338d5d4d09 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -951,8 +951,8 @@ static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
951 return NULL; 951 return NULL;
952} 952}
953 953
954void 954void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
955cfq_update_blkio_group_weight(struct blkio_group *blkg, unsigned int weight) 955 unsigned int weight)
956{ 956{
957 cfqg_of_blkg(blkg)->weight = weight; 957 cfqg_of_blkg(blkg)->weight = weight;
958} 958}