diff options
Diffstat (limited to 'block/blk-throttle.c')
-rw-r--r-- | block/blk-throttle.c | 139 |
1 files changed, 71 insertions, 68 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index e36cc10a346c..5352bdafbcf0 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -102,7 +102,7 @@ struct throtl_data | |||
102 | /* Work for dispatching throttled bios */ | 102 | /* Work for dispatching throttled bios */ |
103 | struct delayed_work throtl_work; | 103 | struct delayed_work throtl_work; |
104 | 104 | ||
105 | atomic_t limits_changed; | 105 | bool limits_changed; |
106 | }; | 106 | }; |
107 | 107 | ||
108 | enum tg_state_flags { | 108 | enum tg_state_flags { |
@@ -201,6 +201,7 @@ static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td, | |||
201 | RB_CLEAR_NODE(&tg->rb_node); | 201 | RB_CLEAR_NODE(&tg->rb_node); |
202 | bio_list_init(&tg->bio_lists[0]); | 202 | bio_list_init(&tg->bio_lists[0]); |
203 | bio_list_init(&tg->bio_lists[1]); | 203 | bio_list_init(&tg->bio_lists[1]); |
204 | td->limits_changed = false; | ||
204 | 205 | ||
205 | /* | 206 | /* |
206 | * Take the initial reference that will be released on destroy | 207 | * Take the initial reference that will be released on destroy |
@@ -737,34 +738,36 @@ static void throtl_process_limit_change(struct throtl_data *td) | |||
737 | struct throtl_grp *tg; | 738 | struct throtl_grp *tg; |
738 | struct hlist_node *pos, *n; | 739 | struct hlist_node *pos, *n; |
739 | 740 | ||
740 | if (!atomic_read(&td->limits_changed)) | 741 | if (!td->limits_changed) |
741 | return; | 742 | return; |
742 | 743 | ||
743 | throtl_log(td, "limit changed =%d", atomic_read(&td->limits_changed)); | 744 | xchg(&td->limits_changed, false); |
744 | 745 | ||
745 | /* | 746 | throtl_log(td, "limits changed"); |
746 | * Make sure updates from throtl_update_blkio_group_read_bps() group | ||
747 | * of functions to tg->limits_changed are visible. We do not | ||
748 | * want update td->limits_changed to be visible but update to | ||
749 | * tg->limits_changed not being visible yet on this cpu. Hence | ||
750 | * the read barrier. | ||
751 | */ | ||
752 | smp_rmb(); | ||
753 | 747 | ||
754 | hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { | 748 | hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { |
755 | if (throtl_tg_on_rr(tg) && tg->limits_changed) { | 749 | if (!tg->limits_changed) |
756 | throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu" | 750 | continue; |
757 | " riops=%u wiops=%u", tg->bps[READ], | 751 | |
758 | tg->bps[WRITE], tg->iops[READ], | 752 | if (!xchg(&tg->limits_changed, false)) |
759 | tg->iops[WRITE]); | 753 | continue; |
754 | |||
755 | throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu" | ||
756 | " riops=%u wiops=%u", tg->bps[READ], tg->bps[WRITE], | ||
757 | tg->iops[READ], tg->iops[WRITE]); | ||
758 | |||
759 | /* | ||
760 | * Restart the slices for both READ and WRITES. It | ||
761 | * might happen that a group's limit are dropped | ||
762 | * suddenly and we don't want to account recently | ||
763 | * dispatched IO with new low rate | ||
764 | */ | ||
765 | throtl_start_new_slice(td, tg, 0); | ||
766 | throtl_start_new_slice(td, tg, 1); | ||
767 | |||
768 | if (throtl_tg_on_rr(tg)) | ||
760 | tg_update_disptime(td, tg); | 769 | tg_update_disptime(td, tg); |
761 | tg->limits_changed = false; | ||
762 | } | ||
763 | } | 770 | } |
764 | |||
765 | smp_mb__before_atomic_dec(); | ||
766 | atomic_dec(&td->limits_changed); | ||
767 | smp_mb__after_atomic_dec(); | ||
768 | } | 771 | } |
769 | 772 | ||
770 | /* Dispatch throttled bios. Should be called without queue lock held. */ | 773 | /* Dispatch throttled bios. Should be called without queue lock held. */ |
@@ -774,6 +777,7 @@ static int throtl_dispatch(struct request_queue *q) | |||
774 | unsigned int nr_disp = 0; | 777 | unsigned int nr_disp = 0; |
775 | struct bio_list bio_list_on_stack; | 778 | struct bio_list bio_list_on_stack; |
776 | struct bio *bio; | 779 | struct bio *bio; |
780 | struct blk_plug plug; | ||
777 | 781 | ||
778 | spin_lock_irq(q->queue_lock); | 782 | spin_lock_irq(q->queue_lock); |
779 | 783 | ||
@@ -802,9 +806,10 @@ out: | |||
802 | * immediate dispatch | 806 | * immediate dispatch |
803 | */ | 807 | */ |
804 | if (nr_disp) { | 808 | if (nr_disp) { |
809 | blk_start_plug(&plug); | ||
805 | while((bio = bio_list_pop(&bio_list_on_stack))) | 810 | while((bio = bio_list_pop(&bio_list_on_stack))) |
806 | generic_make_request(bio); | 811 | generic_make_request(bio); |
807 | blk_unplug(q); | 812 | blk_finish_plug(&plug); |
808 | } | 813 | } |
809 | return nr_disp; | 814 | return nr_disp; |
810 | } | 815 | } |
@@ -825,7 +830,8 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay) | |||
825 | 830 | ||
826 | struct delayed_work *dwork = &td->throtl_work; | 831 | struct delayed_work *dwork = &td->throtl_work; |
827 | 832 | ||
828 | if (total_nr_queued(td) > 0) { | 833 | /* schedule work if limits changed even if no bio is queued */ |
834 | if (total_nr_queued(td) > 0 || td->limits_changed) { | ||
829 | /* | 835 | /* |
830 | * We might have a work scheduled to be executed in future. | 836 | * We might have a work scheduled to be executed in future. |
831 | * Cancel that and schedule a new one. | 837 | * Cancel that and schedule a new one. |
@@ -898,6 +904,15 @@ void throtl_unlink_blkio_group(void *key, struct blkio_group *blkg) | |||
898 | spin_unlock_irqrestore(td->queue->queue_lock, flags); | 904 | spin_unlock_irqrestore(td->queue->queue_lock, flags); |
899 | } | 905 | } |
900 | 906 | ||
907 | static void throtl_update_blkio_group_common(struct throtl_data *td, | ||
908 | struct throtl_grp *tg) | ||
909 | { | ||
910 | xchg(&tg->limits_changed, true); | ||
911 | xchg(&td->limits_changed, true); | ||
912 | /* Schedule a work now to process the limit change */ | ||
913 | throtl_schedule_delayed_work(td, 0); | ||
914 | } | ||
915 | |||
901 | /* | 916 | /* |
902 | * For all update functions, key should be a valid pointer because these | 917 | * For all update functions, key should be a valid pointer because these |
903 | * update functions are called under blkcg_lock, that means, blkg is | 918 | * update functions are called under blkcg_lock, that means, blkg is |
@@ -911,64 +926,43 @@ static void throtl_update_blkio_group_read_bps(void *key, | |||
911 | struct blkio_group *blkg, u64 read_bps) | 926 | struct blkio_group *blkg, u64 read_bps) |
912 | { | 927 | { |
913 | struct throtl_data *td = key; | 928 | struct throtl_data *td = key; |
929 | struct throtl_grp *tg = tg_of_blkg(blkg); | ||
914 | 930 | ||
915 | tg_of_blkg(blkg)->bps[READ] = read_bps; | 931 | tg->bps[READ] = read_bps; |
916 | /* Make sure read_bps is updated before setting limits_changed */ | 932 | throtl_update_blkio_group_common(td, tg); |
917 | smp_wmb(); | ||
918 | tg_of_blkg(blkg)->limits_changed = true; | ||
919 | |||
920 | /* Make sure tg->limits_changed is updated before td->limits_changed */ | ||
921 | smp_mb__before_atomic_inc(); | ||
922 | atomic_inc(&td->limits_changed); | ||
923 | smp_mb__after_atomic_inc(); | ||
924 | |||
925 | /* Schedule a work now to process the limit change */ | ||
926 | throtl_schedule_delayed_work(td, 0); | ||
927 | } | 933 | } |
928 | 934 | ||
929 | static void throtl_update_blkio_group_write_bps(void *key, | 935 | static void throtl_update_blkio_group_write_bps(void *key, |
930 | struct blkio_group *blkg, u64 write_bps) | 936 | struct blkio_group *blkg, u64 write_bps) |
931 | { | 937 | { |
932 | struct throtl_data *td = key; | 938 | struct throtl_data *td = key; |
939 | struct throtl_grp *tg = tg_of_blkg(blkg); | ||
933 | 940 | ||
934 | tg_of_blkg(blkg)->bps[WRITE] = write_bps; | 941 | tg->bps[WRITE] = write_bps; |
935 | smp_wmb(); | 942 | throtl_update_blkio_group_common(td, tg); |
936 | tg_of_blkg(blkg)->limits_changed = true; | ||
937 | smp_mb__before_atomic_inc(); | ||
938 | atomic_inc(&td->limits_changed); | ||
939 | smp_mb__after_atomic_inc(); | ||
940 | throtl_schedule_delayed_work(td, 0); | ||
941 | } | 943 | } |
942 | 944 | ||
943 | static void throtl_update_blkio_group_read_iops(void *key, | 945 | static void throtl_update_blkio_group_read_iops(void *key, |
944 | struct blkio_group *blkg, unsigned int read_iops) | 946 | struct blkio_group *blkg, unsigned int read_iops) |
945 | { | 947 | { |
946 | struct throtl_data *td = key; | 948 | struct throtl_data *td = key; |
949 | struct throtl_grp *tg = tg_of_blkg(blkg); | ||
947 | 950 | ||
948 | tg_of_blkg(blkg)->iops[READ] = read_iops; | 951 | tg->iops[READ] = read_iops; |
949 | smp_wmb(); | 952 | throtl_update_blkio_group_common(td, tg); |
950 | tg_of_blkg(blkg)->limits_changed = true; | ||
951 | smp_mb__before_atomic_inc(); | ||
952 | atomic_inc(&td->limits_changed); | ||
953 | smp_mb__after_atomic_inc(); | ||
954 | throtl_schedule_delayed_work(td, 0); | ||
955 | } | 953 | } |
956 | 954 | ||
957 | static void throtl_update_blkio_group_write_iops(void *key, | 955 | static void throtl_update_blkio_group_write_iops(void *key, |
958 | struct blkio_group *blkg, unsigned int write_iops) | 956 | struct blkio_group *blkg, unsigned int write_iops) |
959 | { | 957 | { |
960 | struct throtl_data *td = key; | 958 | struct throtl_data *td = key; |
959 | struct throtl_grp *tg = tg_of_blkg(blkg); | ||
961 | 960 | ||
962 | tg_of_blkg(blkg)->iops[WRITE] = write_iops; | 961 | tg->iops[WRITE] = write_iops; |
963 | smp_wmb(); | 962 | throtl_update_blkio_group_common(td, tg); |
964 | tg_of_blkg(blkg)->limits_changed = true; | ||
965 | smp_mb__before_atomic_inc(); | ||
966 | atomic_inc(&td->limits_changed); | ||
967 | smp_mb__after_atomic_inc(); | ||
968 | throtl_schedule_delayed_work(td, 0); | ||
969 | } | 963 | } |
970 | 964 | ||
971 | void throtl_shutdown_timer_wq(struct request_queue *q) | 965 | static void throtl_shutdown_wq(struct request_queue *q) |
972 | { | 966 | { |
973 | struct throtl_data *td = q->td; | 967 | struct throtl_data *td = q->td; |
974 | 968 | ||
@@ -1009,20 +1003,28 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop) | |||
1009 | /* | 1003 | /* |
1010 | * There is already another bio queued in same dir. No | 1004 | * There is already another bio queued in same dir. No |
1011 | * need to update dispatch time. | 1005 | * need to update dispatch time. |
1012 | * Still update the disptime if rate limits on this group | ||
1013 | * were changed. | ||
1014 | */ | 1006 | */ |
1015 | if (!tg->limits_changed) | 1007 | update_disptime = false; |
1016 | update_disptime = false; | ||
1017 | else | ||
1018 | tg->limits_changed = false; | ||
1019 | |||
1020 | goto queue_bio; | 1008 | goto queue_bio; |
1009 | |||
1021 | } | 1010 | } |
1022 | 1011 | ||
1023 | /* Bio is with-in rate limit of group */ | 1012 | /* Bio is with-in rate limit of group */ |
1024 | if (tg_may_dispatch(td, tg, bio, NULL)) { | 1013 | if (tg_may_dispatch(td, tg, bio, NULL)) { |
1025 | throtl_charge_bio(tg, bio); | 1014 | throtl_charge_bio(tg, bio); |
1015 | |||
1016 | /* | ||
1017 | * We need to trim slice even when bios are not being queued | ||
1018 | * otherwise it might happen that a bio is not queued for | ||
1019 | * a long time and slice keeps on extending and trim is not | ||
1020 | * called for a long time. Now if limits are reduced suddenly | ||
1021 | * we take into account all the IO dispatched so far at new | ||
1022 | * low rate and * newly queued IO gets a really long dispatch | ||
1023 | * time. | ||
1024 | * | ||
1025 | * So keep on trimming slice even if bio is not queued. | ||
1026 | */ | ||
1027 | throtl_trim_slice(td, tg, rw); | ||
1026 | goto out; | 1028 | goto out; |
1027 | } | 1029 | } |
1028 | 1030 | ||
@@ -1058,7 +1060,7 @@ int blk_throtl_init(struct request_queue *q) | |||
1058 | 1060 | ||
1059 | INIT_HLIST_HEAD(&td->tg_list); | 1061 | INIT_HLIST_HEAD(&td->tg_list); |
1060 | td->tg_service_tree = THROTL_RB_ROOT; | 1062 | td->tg_service_tree = THROTL_RB_ROOT; |
1061 | atomic_set(&td->limits_changed, 0); | 1063 | td->limits_changed = false; |
1062 | 1064 | ||
1063 | /* Init root group */ | 1065 | /* Init root group */ |
1064 | tg = &td->root_tg; | 1066 | tg = &td->root_tg; |
@@ -1070,6 +1072,7 @@ int blk_throtl_init(struct request_queue *q) | |||
1070 | /* Practically unlimited BW */ | 1072 | /* Practically unlimited BW */ |
1071 | tg->bps[0] = tg->bps[1] = -1; | 1073 | tg->bps[0] = tg->bps[1] = -1; |
1072 | tg->iops[0] = tg->iops[1] = -1; | 1074 | tg->iops[0] = tg->iops[1] = -1; |
1075 | td->limits_changed = false; | ||
1073 | 1076 | ||
1074 | /* | 1077 | /* |
1075 | * Set root group reference to 2. One reference will be dropped when | 1078 | * Set root group reference to 2. One reference will be dropped when |
@@ -1102,7 +1105,7 @@ void blk_throtl_exit(struct request_queue *q) | |||
1102 | 1105 | ||
1103 | BUG_ON(!td); | 1106 | BUG_ON(!td); |
1104 | 1107 | ||
1105 | throtl_shutdown_timer_wq(q); | 1108 | throtl_shutdown_wq(q); |
1106 | 1109 | ||
1107 | spin_lock_irq(q->queue_lock); | 1110 | spin_lock_irq(q->queue_lock); |
1108 | throtl_release_tgs(td); | 1111 | throtl_release_tgs(td); |
@@ -1132,7 +1135,7 @@ void blk_throtl_exit(struct request_queue *q) | |||
1132 | * update limits through cgroup and another work got queued, cancel | 1135 | * update limits through cgroup and another work got queued, cancel |
1133 | * it. | 1136 | * it. |
1134 | */ | 1137 | */ |
1135 | throtl_shutdown_timer_wq(q); | 1138 | throtl_shutdown_wq(q); |
1136 | throtl_td_free(td); | 1139 | throtl_td_free(td); |
1137 | } | 1140 | } |
1138 | 1141 | ||