diff options
author | Vivek Goyal <vgoyal@redhat.com> | 2010-10-01 08:49:49 -0400 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2010-10-01 08:49:49 -0400 |
commit | fe0714377ee2ca161bf2afb7773e22f15f1786d4 (patch) | |
tree | 09f5e8686d741d012333c92251b8cc66793ef916 /block | |
parent | 02977e4af7ed3b478c505e50491ffdf3e1314cf4 (diff) |
blkio: Recalculate the throttled bio dispatch time upon throttle limit change
o Currently any cgroup throttle limit changes are processed asynchronousy and
the change does not take affect till a new bio is dispatched from same group.
o It might happen that a user sets a redicuously low limit on throttling.
Say 1 bytes per second on reads. In such cases simple operations like mount
a disk can wait for a very long time.
o Once bio is throttled, there is no easy way to come out of that wait even if
user increases the read limit later.
o This patch fixes it. Now if a user changes the cgroup limits, we recalculate
the bio dispatch time according to new limits.
o Can't take queueu lock under blkcg_lock, hence after the change I wake
up the dispatch thread again which recalculates the time. So there are some
variables being synchronized across two threads without lock and I had to
make use of barriers. Hoping I have used barriers correctly. Any review of
memory barrier code especially will help.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-cgroup.c | 15 | ||||
-rw-r--r-- | block/blk-cgroup.h | 21 | ||||
-rw-r--r-- | block/blk-throttle.c | 134 | ||||
-rw-r--r-- | block/cfq-iosched.c | 4 |
4 files changed, 139 insertions, 35 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index b06ca70354e3..52c12130a5de 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -124,7 +124,8 @@ blkio_update_group_weight(struct blkio_group *blkg, unsigned int weight) | |||
124 | if (blkiop->plid != blkg->plid) | 124 | if (blkiop->plid != blkg->plid) |
125 | continue; | 125 | continue; |
126 | if (blkiop->ops.blkio_update_group_weight_fn) | 126 | if (blkiop->ops.blkio_update_group_weight_fn) |
127 | blkiop->ops.blkio_update_group_weight_fn(blkg, weight); | 127 | blkiop->ops.blkio_update_group_weight_fn(blkg->key, |
128 | blkg, weight); | ||
128 | } | 129 | } |
129 | } | 130 | } |
130 | 131 | ||
@@ -141,11 +142,13 @@ static inline void blkio_update_group_bps(struct blkio_group *blkg, u64 bps, | |||
141 | 142 | ||
142 | if (fileid == BLKIO_THROTL_read_bps_device | 143 | if (fileid == BLKIO_THROTL_read_bps_device |
143 | && blkiop->ops.blkio_update_group_read_bps_fn) | 144 | && blkiop->ops.blkio_update_group_read_bps_fn) |
144 | blkiop->ops.blkio_update_group_read_bps_fn(blkg, bps); | 145 | blkiop->ops.blkio_update_group_read_bps_fn(blkg->key, |
146 | blkg, bps); | ||
145 | 147 | ||
146 | if (fileid == BLKIO_THROTL_write_bps_device | 148 | if (fileid == BLKIO_THROTL_write_bps_device |
147 | && blkiop->ops.blkio_update_group_write_bps_fn) | 149 | && blkiop->ops.blkio_update_group_write_bps_fn) |
148 | blkiop->ops.blkio_update_group_write_bps_fn(blkg, bps); | 150 | blkiop->ops.blkio_update_group_write_bps_fn(blkg->key, |
151 | blkg, bps); | ||
149 | } | 152 | } |
150 | } | 153 | } |
151 | 154 | ||
@@ -162,11 +165,13 @@ static inline void blkio_update_group_iops(struct blkio_group *blkg, | |||
162 | 165 | ||
163 | if (fileid == BLKIO_THROTL_read_iops_device | 166 | if (fileid == BLKIO_THROTL_read_iops_device |
164 | && blkiop->ops.blkio_update_group_read_iops_fn) | 167 | && blkiop->ops.blkio_update_group_read_iops_fn) |
165 | blkiop->ops.blkio_update_group_read_iops_fn(blkg, iops); | 168 | blkiop->ops.blkio_update_group_read_iops_fn(blkg->key, |
169 | blkg, iops); | ||
166 | 170 | ||
167 | if (fileid == BLKIO_THROTL_write_iops_device | 171 | if (fileid == BLKIO_THROTL_write_iops_device |
168 | && blkiop->ops.blkio_update_group_write_iops_fn) | 172 | && blkiop->ops.blkio_update_group_write_iops_fn) |
169 | blkiop->ops.blkio_update_group_write_iops_fn(blkg,iops); | 173 | blkiop->ops.blkio_update_group_write_iops_fn(blkg->key, |
174 | blkg,iops); | ||
170 | } | 175 | } |
171 | } | 176 | } |
172 | 177 | ||
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 2070053a30b1..034c35562dba 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h | |||
@@ -186,16 +186,17 @@ extern unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, | |||
186 | dev_t dev); | 186 | dev_t dev); |
187 | 187 | ||
188 | typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg); | 188 | typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg); |
189 | typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg, | 189 | |
190 | unsigned int weight); | 190 | typedef void (blkio_update_group_weight_fn) (void *key, |
191 | typedef void (blkio_update_group_read_bps_fn) (struct blkio_group *blkg, | 191 | struct blkio_group *blkg, unsigned int weight); |
192 | u64 read_bps); | 192 | typedef void (blkio_update_group_read_bps_fn) (void * key, |
193 | typedef void (blkio_update_group_write_bps_fn) (struct blkio_group *blkg, | 193 | struct blkio_group *blkg, u64 read_bps); |
194 | u64 write_bps); | 194 | typedef void (blkio_update_group_write_bps_fn) (void *key, |
195 | typedef void (blkio_update_group_read_iops_fn) (struct blkio_group *blkg, | 195 | struct blkio_group *blkg, u64 write_bps); |
196 | unsigned int read_iops); | 196 | typedef void (blkio_update_group_read_iops_fn) (void *key, |
197 | typedef void (blkio_update_group_write_iops_fn) (struct blkio_group *blkg, | 197 | struct blkio_group *blkg, unsigned int read_iops); |
198 | unsigned int write_iops); | 198 | typedef void (blkio_update_group_write_iops_fn) (void *key, |
199 | struct blkio_group *blkg, unsigned int write_iops); | ||
199 | 200 | ||
200 | struct blkio_policy_ops { | 201 | struct blkio_policy_ops { |
201 | blkio_unlink_group_fn *blkio_unlink_group_fn; | 202 | blkio_unlink_group_fn *blkio_unlink_group_fn; |
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index bc2936b80add..11713ed852f4 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -70,6 +70,9 @@ struct throtl_grp { | |||
70 | /* When did we start a new slice */ | 70 | /* When did we start a new slice */ |
71 | unsigned long slice_start[2]; | 71 | unsigned long slice_start[2]; |
72 | unsigned long slice_end[2]; | 72 | unsigned long slice_end[2]; |
73 | |||
74 | /* Some throttle limits got updated for the group */ | ||
75 | bool limits_changed; | ||
73 | }; | 76 | }; |
74 | 77 | ||
75 | struct throtl_data | 78 | struct throtl_data |
@@ -93,6 +96,8 @@ struct throtl_data | |||
93 | 96 | ||
94 | /* Work for dispatching throttled bios */ | 97 | /* Work for dispatching throttled bios */ |
95 | struct delayed_work throtl_work; | 98 | struct delayed_work throtl_work; |
99 | |||
100 | atomic_t limits_changed; | ||
96 | }; | 101 | }; |
97 | 102 | ||
98 | enum tg_state_flags { | 103 | enum tg_state_flags { |
@@ -592,15 +597,6 @@ static void tg_update_disptime(struct throtl_data *td, struct throtl_grp *tg) | |||
592 | min_wait = min(read_wait, write_wait); | 597 | min_wait = min(read_wait, write_wait); |
593 | disptime = jiffies + min_wait; | 598 | disptime = jiffies + min_wait; |
594 | 599 | ||
595 | /* | ||
596 | * If group is already on active tree, then update dispatch time | ||
597 | * only if it is lesser than existing dispatch time. Otherwise | ||
598 | * always update the dispatch time | ||
599 | */ | ||
600 | |||
601 | if (throtl_tg_on_rr(tg) && time_before(disptime, tg->disptime)) | ||
602 | return; | ||
603 | |||
604 | /* Update dispatch time */ | 600 | /* Update dispatch time */ |
605 | throtl_dequeue_tg(td, tg); | 601 | throtl_dequeue_tg(td, tg); |
606 | tg->disptime = disptime; | 602 | tg->disptime = disptime; |
@@ -691,6 +687,46 @@ static int throtl_select_dispatch(struct throtl_data *td, struct bio_list *bl) | |||
691 | return nr_disp; | 687 | return nr_disp; |
692 | } | 688 | } |
693 | 689 | ||
690 | static void throtl_process_limit_change(struct throtl_data *td) | ||
691 | { | ||
692 | struct throtl_grp *tg; | ||
693 | struct hlist_node *pos, *n; | ||
694 | |||
695 | /* | ||
696 | * Make sure atomic_inc() effects from | ||
697 | * throtl_update_blkio_group_read_bps(), group of functions are | ||
698 | * visible. | ||
699 | * Is this required or smp_mb__after_atomic_inc() was suffcient | ||
700 | * after the atomic_inc(). | ||
701 | */ | ||
702 | smp_rmb(); | ||
703 | if (!atomic_read(&td->limits_changed)) | ||
704 | return; | ||
705 | |||
706 | throtl_log(td, "limit changed =%d", atomic_read(&td->limits_changed)); | ||
707 | |||
708 | hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { | ||
709 | /* | ||
710 | * Do I need an smp_rmb() here to make sure tg->limits_changed | ||
711 | * update is visible. I am relying on smp_rmb() at the | ||
712 | * beginning of function and not putting a new one here. | ||
713 | */ | ||
714 | |||
715 | if (throtl_tg_on_rr(tg) && tg->limits_changed) { | ||
716 | throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu" | ||
717 | " riops=%u wiops=%u", tg->bps[READ], | ||
718 | tg->bps[WRITE], tg->iops[READ], | ||
719 | tg->iops[WRITE]); | ||
720 | tg_update_disptime(td, tg); | ||
721 | tg->limits_changed = false; | ||
722 | } | ||
723 | } | ||
724 | |||
725 | smp_mb__before_atomic_dec(); | ||
726 | atomic_dec(&td->limits_changed); | ||
727 | smp_mb__after_atomic_dec(); | ||
728 | } | ||
729 | |||
694 | /* Dispatch throttled bios. Should be called without queue lock held. */ | 730 | /* Dispatch throttled bios. Should be called without queue lock held. */ |
695 | static int throtl_dispatch(struct request_queue *q) | 731 | static int throtl_dispatch(struct request_queue *q) |
696 | { | 732 | { |
@@ -701,6 +737,8 @@ static int throtl_dispatch(struct request_queue *q) | |||
701 | 737 | ||
702 | spin_lock_irq(q->queue_lock); | 738 | spin_lock_irq(q->queue_lock); |
703 | 739 | ||
740 | throtl_process_limit_change(td); | ||
741 | |||
704 | if (!total_nr_queued(td)) | 742 | if (!total_nr_queued(td)) |
705 | goto out; | 743 | goto out; |
706 | 744 | ||
@@ -821,28 +859,74 @@ void throtl_unlink_blkio_group(void *key, struct blkio_group *blkg) | |||
821 | spin_unlock_irqrestore(td->queue->queue_lock, flags); | 859 | spin_unlock_irqrestore(td->queue->queue_lock, flags); |
822 | } | 860 | } |
823 | 861 | ||
824 | static void throtl_update_blkio_group_read_bps (struct blkio_group *blkg, | 862 | /* |
825 | u64 read_bps) | 863 | * For all update functions, key should be a valid pointer because these |
864 | * update functions are called under blkcg_lock, that means, blkg is | ||
865 | * valid and in turn key is valid. queue exit path can not race becuase | ||
866 | * of blkcg_lock | ||
867 | * | ||
868 | * Can not take queue lock in update functions as queue lock under blkcg_lock | ||
869 | * is not allowed. Under other paths we take blkcg_lock under queue_lock. | ||
870 | */ | ||
871 | static void throtl_update_blkio_group_read_bps(void *key, | ||
872 | struct blkio_group *blkg, u64 read_bps) | ||
826 | { | 873 | { |
874 | struct throtl_data *td = key; | ||
875 | |||
827 | tg_of_blkg(blkg)->bps[READ] = read_bps; | 876 | tg_of_blkg(blkg)->bps[READ] = read_bps; |
877 | /* Make sure read_bps is updated before setting limits_changed */ | ||
878 | smp_wmb(); | ||
879 | tg_of_blkg(blkg)->limits_changed = true; | ||
880 | |||
881 | /* Make sure tg->limits_changed is updated before td->limits_changed */ | ||
882 | smp_mb__before_atomic_inc(); | ||
883 | atomic_inc(&td->limits_changed); | ||
884 | smp_mb__after_atomic_inc(); | ||
885 | |||
886 | /* Schedule a work now to process the limit change */ | ||
887 | throtl_schedule_delayed_work(td->queue, 0); | ||
828 | } | 888 | } |
829 | 889 | ||
830 | static void throtl_update_blkio_group_write_bps (struct blkio_group *blkg, | 890 | static void throtl_update_blkio_group_write_bps(void *key, |
831 | u64 write_bps) | 891 | struct blkio_group *blkg, u64 write_bps) |
832 | { | 892 | { |
893 | struct throtl_data *td = key; | ||
894 | |||
833 | tg_of_blkg(blkg)->bps[WRITE] = write_bps; | 895 | tg_of_blkg(blkg)->bps[WRITE] = write_bps; |
896 | smp_wmb(); | ||
897 | tg_of_blkg(blkg)->limits_changed = true; | ||
898 | smp_mb__before_atomic_inc(); | ||
899 | atomic_inc(&td->limits_changed); | ||
900 | smp_mb__after_atomic_inc(); | ||
901 | throtl_schedule_delayed_work(td->queue, 0); | ||
834 | } | 902 | } |
835 | 903 | ||
836 | static void throtl_update_blkio_group_read_iops (struct blkio_group *blkg, | 904 | static void throtl_update_blkio_group_read_iops(void *key, |
837 | unsigned int read_iops) | 905 | struct blkio_group *blkg, unsigned int read_iops) |
838 | { | 906 | { |
907 | struct throtl_data *td = key; | ||
908 | |||
839 | tg_of_blkg(blkg)->iops[READ] = read_iops; | 909 | tg_of_blkg(blkg)->iops[READ] = read_iops; |
910 | smp_wmb(); | ||
911 | tg_of_blkg(blkg)->limits_changed = true; | ||
912 | smp_mb__before_atomic_inc(); | ||
913 | atomic_inc(&td->limits_changed); | ||
914 | smp_mb__after_atomic_inc(); | ||
915 | throtl_schedule_delayed_work(td->queue, 0); | ||
840 | } | 916 | } |
841 | 917 | ||
842 | static void throtl_update_blkio_group_write_iops (struct blkio_group *blkg, | 918 | static void throtl_update_blkio_group_write_iops(void *key, |
843 | unsigned int write_iops) | 919 | struct blkio_group *blkg, unsigned int write_iops) |
844 | { | 920 | { |
921 | struct throtl_data *td = key; | ||
922 | |||
845 | tg_of_blkg(blkg)->iops[WRITE] = write_iops; | 923 | tg_of_blkg(blkg)->iops[WRITE] = write_iops; |
924 | smp_wmb(); | ||
925 | tg_of_blkg(blkg)->limits_changed = true; | ||
926 | smp_mb__before_atomic_inc(); | ||
927 | atomic_inc(&td->limits_changed); | ||
928 | smp_mb__after_atomic_inc(); | ||
929 | throtl_schedule_delayed_work(td->queue, 0); | ||
846 | } | 930 | } |
847 | 931 | ||
848 | void throtl_shutdown_timer_wq(struct request_queue *q) | 932 | void throtl_shutdown_timer_wq(struct request_queue *q) |
@@ -886,8 +970,14 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop) | |||
886 | /* | 970 | /* |
887 | * There is already another bio queued in same dir. No | 971 | * There is already another bio queued in same dir. No |
888 | * need to update dispatch time. | 972 | * need to update dispatch time. |
973 | * Still update the disptime if rate limits on this group | ||
974 | * were changed. | ||
889 | */ | 975 | */ |
890 | update_disptime = false; | 976 | if (!tg->limits_changed) |
977 | update_disptime = false; | ||
978 | else | ||
979 | tg->limits_changed = false; | ||
980 | |||
891 | goto queue_bio; | 981 | goto queue_bio; |
892 | } | 982 | } |
893 | 983 | ||
@@ -929,6 +1019,7 @@ int blk_throtl_init(struct request_queue *q) | |||
929 | 1019 | ||
930 | INIT_HLIST_HEAD(&td->tg_list); | 1020 | INIT_HLIST_HEAD(&td->tg_list); |
931 | td->tg_service_tree = THROTL_RB_ROOT; | 1021 | td->tg_service_tree = THROTL_RB_ROOT; |
1022 | atomic_set(&td->limits_changed, 0); | ||
932 | 1023 | ||
933 | /* Init root group */ | 1024 | /* Init root group */ |
934 | tg = &td->root_tg; | 1025 | tg = &td->root_tg; |
@@ -996,6 +1087,13 @@ void blk_throtl_exit(struct request_queue *q) | |||
996 | */ | 1087 | */ |
997 | if (wait) | 1088 | if (wait) |
998 | synchronize_rcu(); | 1089 | synchronize_rcu(); |
1090 | |||
1091 | /* | ||
1092 | * Just being safe to make sure after previous flush if some body did | ||
1093 | * update limits through cgroup and another work got queued, cancel | ||
1094 | * it. | ||
1095 | */ | ||
1096 | throtl_shutdown_timer_wq(q); | ||
999 | throtl_td_free(td); | 1097 | throtl_td_free(td); |
1000 | } | 1098 | } |
1001 | 1099 | ||
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 684592621736..86338d5d4d09 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -951,8 +951,8 @@ static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg) | |||
951 | return NULL; | 951 | return NULL; |
952 | } | 952 | } |
953 | 953 | ||
954 | void | 954 | void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg, |
955 | cfq_update_blkio_group_weight(struct blkio_group *blkg, unsigned int weight) | 955 | unsigned int weight) |
956 | { | 956 | { |
957 | cfqg_of_blkg(blkg)->weight = weight; | 957 | cfqg_of_blkg(blkg)->weight = weight; |
958 | } | 958 | } |