aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVivek Goyal <vgoyal@redhat.com>2011-03-07 15:09:32 -0500
committerJens Axboe <jaxboe@fusionio.com>2011-03-07 15:09:32 -0500
commitde701c74a34005e637e1ca2634fbf28fd1debba2 (patch)
tree45a4af16e2a76cbf5866cfb3f284956988918a13
parent231d704b4ab7491473c0b1a9cd0c6e0d1cba85b9 (diff)
blk-throttle: Some cleanups and race fixes in limit update code
When throttle group limits are updated through cgroups, a thread is woken up to process these updates. While reviewing that code, oleg noted couple of race conditions existed in the code and he also suggested that code can be simplified. This patch fixes the races simplifies the code based on Oleg's suggestions: - Use xchg(). - Introduced a common function throtl_update_blkio_group_common() which is shared now by all iops/bps update functions. Reviewed-by: Oleg Nesterov <oleg@redhat.com> Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Vivek Goyal <vgoyal@redhat.com> Fixed a merge issue, throtl_schedule_delayed_work() takes throtl_data as the argument now, not the queue. Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
-rw-r--r--block/blk-throttle.c96
1 files changed, 40 insertions, 56 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a29f09240c0..32dd3e4b041 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -102,7 +102,7 @@ struct throtl_data
102 /* Work for dispatching throttled bios */ 102 /* Work for dispatching throttled bios */
103 struct delayed_work throtl_work; 103 struct delayed_work throtl_work;
104 104
105 atomic_t limits_changed; 105 bool limits_changed;
106}; 106};
107 107
108enum tg_state_flags { 108enum tg_state_flags {
@@ -201,6 +201,7 @@ static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td,
201 RB_CLEAR_NODE(&tg->rb_node); 201 RB_CLEAR_NODE(&tg->rb_node);
202 bio_list_init(&tg->bio_lists[0]); 202 bio_list_init(&tg->bio_lists[0]);
203 bio_list_init(&tg->bio_lists[1]); 203 bio_list_init(&tg->bio_lists[1]);
204 td->limits_changed = false;
204 205
205 /* 206 /*
206 * Take the initial reference that will be released on destroy 207 * Take the initial reference that will be released on destroy
@@ -737,34 +738,27 @@ static void throtl_process_limit_change(struct throtl_data *td)
737 struct throtl_grp *tg; 738 struct throtl_grp *tg;
738 struct hlist_node *pos, *n; 739 struct hlist_node *pos, *n;
739 740
740 if (!atomic_read(&td->limits_changed)) 741 if (!td->limits_changed)
741 return; 742 return;
742 743
743 throtl_log(td, "limit changed =%d", atomic_read(&td->limits_changed)); 744 xchg(&td->limits_changed, false);
744 745
745 /* 746 throtl_log(td, "limits changed");
746 * Make sure updates from throtl_update_blkio_group_read_bps() group
747 * of functions to tg->limits_changed are visible. We do not
748 * want update td->limits_changed to be visible but update to
749 * tg->limits_changed not being visible yet on this cpu. Hence
750 * the read barrier.
751 */
752 smp_rmb();
753 747
754 hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { 748 hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) {
755 if (throtl_tg_on_rr(tg) && tg->limits_changed) { 749 if (!tg->limits_changed)
756 throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu" 750 continue;
757 " riops=%u wiops=%u", tg->bps[READ], 751
758 tg->bps[WRITE], tg->iops[READ], 752 if (!xchg(&tg->limits_changed, false))
759 tg->iops[WRITE]); 753 continue;
754
755 throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu"
756 " riops=%u wiops=%u", tg->bps[READ], tg->bps[WRITE],
757 tg->iops[READ], tg->iops[WRITE]);
758
759 if (throtl_tg_on_rr(tg))
760 tg_update_disptime(td, tg); 760 tg_update_disptime(td, tg);
761 tg->limits_changed = false;
762 }
763 } 761 }
764
765 smp_mb__before_atomic_dec();
766 atomic_dec(&td->limits_changed);
767 smp_mb__after_atomic_dec();
768} 762}
769 763
770/* Dispatch throttled bios. Should be called without queue lock held. */ 764/* Dispatch throttled bios. Should be called without queue lock held. */
@@ -898,6 +892,15 @@ void throtl_unlink_blkio_group(void *key, struct blkio_group *blkg)
898 spin_unlock_irqrestore(td->queue->queue_lock, flags); 892 spin_unlock_irqrestore(td->queue->queue_lock, flags);
899} 893}
900 894
895static void throtl_update_blkio_group_common(struct throtl_data *td,
896 struct throtl_grp *tg)
897{
898 xchg(&tg->limits_changed, true);
899 xchg(&td->limits_changed, true);
900 /* Schedule a work now to process the limit change */
901 throtl_schedule_delayed_work(td, 0);
902}
903
901/* 904/*
902 * For all update functions, key should be a valid pointer because these 905 * For all update functions, key should be a valid pointer because these
903 * update functions are called under blkcg_lock, that means, blkg is 906 * update functions are called under blkcg_lock, that means, blkg is
@@ -911,61 +914,40 @@ static void throtl_update_blkio_group_read_bps(void *key,
911 struct blkio_group *blkg, u64 read_bps) 914 struct blkio_group *blkg, u64 read_bps)
912{ 915{
913 struct throtl_data *td = key; 916 struct throtl_data *td = key;
917 struct throtl_grp *tg = tg_of_blkg(blkg);
914 918
915 tg_of_blkg(blkg)->bps[READ] = read_bps; 919 tg->bps[READ] = read_bps;
916 /* Make sure read_bps is updated before setting limits_changed */ 920 throtl_update_blkio_group_common(td, tg);
917 smp_wmb();
918 tg_of_blkg(blkg)->limits_changed = true;
919
920 /* Make sure tg->limits_changed is updated before td->limits_changed */
921 smp_mb__before_atomic_inc();
922 atomic_inc(&td->limits_changed);
923 smp_mb__after_atomic_inc();
924
925 /* Schedule a work now to process the limit change */
926 throtl_schedule_delayed_work(td, 0);
927} 921}
928 922
929static void throtl_update_blkio_group_write_bps(void *key, 923static void throtl_update_blkio_group_write_bps(void *key,
930 struct blkio_group *blkg, u64 write_bps) 924 struct blkio_group *blkg, u64 write_bps)
931{ 925{
932 struct throtl_data *td = key; 926 struct throtl_data *td = key;
927 struct throtl_grp *tg = tg_of_blkg(blkg);
933 928
934 tg_of_blkg(blkg)->bps[WRITE] = write_bps; 929 tg->bps[WRITE] = write_bps;
935 smp_wmb(); 930 throtl_update_blkio_group_common(td, tg);
936 tg_of_blkg(blkg)->limits_changed = true;
937 smp_mb__before_atomic_inc();
938 atomic_inc(&td->limits_changed);
939 smp_mb__after_atomic_inc();
940 throtl_schedule_delayed_work(td, 0);
941} 931}
942 932
943static void throtl_update_blkio_group_read_iops(void *key, 933static void throtl_update_blkio_group_read_iops(void *key,
944 struct blkio_group *blkg, unsigned int read_iops) 934 struct blkio_group *blkg, unsigned int read_iops)
945{ 935{
946 struct throtl_data *td = key; 936 struct throtl_data *td = key;
937 struct throtl_grp *tg = tg_of_blkg(blkg);
947 938
948 tg_of_blkg(blkg)->iops[READ] = read_iops; 939 tg->iops[READ] = read_iops;
949 smp_wmb(); 940 throtl_update_blkio_group_common(td, tg);
950 tg_of_blkg(blkg)->limits_changed = true;
951 smp_mb__before_atomic_inc();
952 atomic_inc(&td->limits_changed);
953 smp_mb__after_atomic_inc();
954 throtl_schedule_delayed_work(td, 0);
955} 941}
956 942
957static void throtl_update_blkio_group_write_iops(void *key, 943static void throtl_update_blkio_group_write_iops(void *key,
958 struct blkio_group *blkg, unsigned int write_iops) 944 struct blkio_group *blkg, unsigned int write_iops)
959{ 945{
960 struct throtl_data *td = key; 946 struct throtl_data *td = key;
947 struct throtl_grp *tg = tg_of_blkg(blkg);
961 948
962 tg_of_blkg(blkg)->iops[WRITE] = write_iops; 949 tg->iops[WRITE] = write_iops;
963 smp_wmb(); 950 throtl_update_blkio_group_common(td, tg);
964 tg_of_blkg(blkg)->limits_changed = true;
965 smp_mb__before_atomic_inc();
966 atomic_inc(&td->limits_changed);
967 smp_mb__after_atomic_inc();
968 throtl_schedule_delayed_work(td, 0);
969} 951}
970 952
971static void throtl_shutdown_wq(struct request_queue *q) 953static void throtl_shutdown_wq(struct request_queue *q)
@@ -1012,6 +994,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
1012 */ 994 */
1013 update_disptime = false; 995 update_disptime = false;
1014 goto queue_bio; 996 goto queue_bio;
997
1015 } 998 }
1016 999
1017 /* Bio is with-in rate limit of group */ 1000 /* Bio is with-in rate limit of group */
@@ -1052,7 +1035,7 @@ int blk_throtl_init(struct request_queue *q)
1052 1035
1053 INIT_HLIST_HEAD(&td->tg_list); 1036 INIT_HLIST_HEAD(&td->tg_list);
1054 td->tg_service_tree = THROTL_RB_ROOT; 1037 td->tg_service_tree = THROTL_RB_ROOT;
1055 atomic_set(&td->limits_changed, 0); 1038 td->limits_changed = false;
1056 1039
1057 /* Init root group */ 1040 /* Init root group */
1058 tg = &td->root_tg; 1041 tg = &td->root_tg;
@@ -1064,6 +1047,7 @@ int blk_throtl_init(struct request_queue *q)
1064 /* Practically unlimited BW */ 1047 /* Practically unlimited BW */
1065 tg->bps[0] = tg->bps[1] = -1; 1048 tg->bps[0] = tg->bps[1] = -1;
1066 tg->iops[0] = tg->iops[1] = -1; 1049 tg->iops[0] = tg->iops[1] = -1;
1050 td->limits_changed = false;
1067 1051
1068 /* 1052 /*
1069 * Set root group reference to 2. One reference will be dropped when 1053 * Set root group reference to 2. One reference will be dropped when