diff options
Diffstat (limited to 'block/blk-throttle.c')
-rw-r--r-- | block/blk-throttle.c | 128 |
1 files changed, 113 insertions, 15 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index cb259bc46f43..27f7960dd421 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -40,6 +40,14 @@ struct throtl_rb_root { | |||
40 | 40 | ||
41 | #define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node) | 41 | #define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node) |
42 | 42 | ||
43 | /* Per-cpu group stats */ | ||
44 | struct tg_stats_cpu { | ||
45 | /* total bytes transferred */ | ||
46 | struct blkg_rwstat service_bytes; | ||
47 | /* total IOs serviced, post merge */ | ||
48 | struct blkg_rwstat serviced; | ||
49 | }; | ||
50 | |||
43 | struct throtl_grp { | 51 | struct throtl_grp { |
44 | /* active throtl group service_tree member */ | 52 | /* active throtl group service_tree member */ |
45 | struct rb_node rb_node; | 53 | struct rb_node rb_node; |
@@ -76,6 +84,12 @@ struct throtl_grp { | |||
76 | 84 | ||
77 | /* Some throttle limits got updated for the group */ | 85 | /* Some throttle limits got updated for the group */ |
78 | int limits_changed; | 86 | int limits_changed; |
87 | |||
88 | /* Per cpu stats pointer */ | ||
89 | struct tg_stats_cpu __percpu *stats_cpu; | ||
90 | |||
91 | /* List of tgs waiting for per cpu stats memory to be allocated */ | ||
92 | struct list_head stats_alloc_node; | ||
79 | }; | 93 | }; |
80 | 94 | ||
81 | struct throtl_data | 95 | struct throtl_data |
@@ -100,6 +114,13 @@ struct throtl_data | |||
100 | int limits_changed; | 114 | int limits_changed; |
101 | }; | 115 | }; |
102 | 116 | ||
117 | /* list and work item to allocate percpu group stats */ | ||
118 | static DEFINE_SPINLOCK(tg_stats_alloc_lock); | ||
119 | static LIST_HEAD(tg_stats_alloc_list); | ||
120 | |||
121 | static void tg_stats_alloc_fn(struct work_struct *); | ||
122 | static DECLARE_DELAYED_WORK(tg_stats_alloc_work, tg_stats_alloc_fn); | ||
123 | |||
103 | static inline struct throtl_grp *blkg_to_tg(struct blkio_group *blkg) | 124 | static inline struct throtl_grp *blkg_to_tg(struct blkio_group *blkg) |
104 | { | 125 | { |
105 | return blkg_to_pdata(blkg, &blkio_policy_throtl); | 126 | return blkg_to_pdata(blkg, &blkio_policy_throtl); |
@@ -142,6 +163,44 @@ static inline unsigned int total_nr_queued(struct throtl_data *td) | |||
142 | return td->nr_queued[0] + td->nr_queued[1]; | 163 | return td->nr_queued[0] + td->nr_queued[1]; |
143 | } | 164 | } |
144 | 165 | ||
166 | /* | ||
167 | * Worker for allocating per cpu stat for tgs. This is scheduled on the | ||
168 | * system_nrt_wq once there are some groups on the alloc_list waiting for | ||
169 | * allocation. | ||
170 | */ | ||
171 | static void tg_stats_alloc_fn(struct work_struct *work) | ||
172 | { | ||
173 | static struct tg_stats_cpu *stats_cpu; /* this fn is non-reentrant */ | ||
174 | struct delayed_work *dwork = to_delayed_work(work); | ||
175 | bool empty = false; | ||
176 | |||
177 | alloc_stats: | ||
178 | if (!stats_cpu) { | ||
179 | stats_cpu = alloc_percpu(struct tg_stats_cpu); | ||
180 | if (!stats_cpu) { | ||
181 | /* allocation failed, try again after some time */ | ||
182 | queue_delayed_work(system_nrt_wq, dwork, | ||
183 | msecs_to_jiffies(10)); | ||
184 | return; | ||
185 | } | ||
186 | } | ||
187 | |||
188 | spin_lock_irq(&tg_stats_alloc_lock); | ||
189 | |||
190 | if (!list_empty(&tg_stats_alloc_list)) { | ||
191 | struct throtl_grp *tg = list_first_entry(&tg_stats_alloc_list, | ||
192 | struct throtl_grp, | ||
193 | stats_alloc_node); | ||
194 | swap(tg->stats_cpu, stats_cpu); | ||
195 | list_del_init(&tg->stats_alloc_node); | ||
196 | } | ||
197 | |||
198 | empty = list_empty(&tg_stats_alloc_list); | ||
199 | spin_unlock_irq(&tg_stats_alloc_lock); | ||
200 | if (!empty) | ||
201 | goto alloc_stats; | ||
202 | } | ||
203 | |||
145 | static void throtl_init_blkio_group(struct blkio_group *blkg) | 204 | static void throtl_init_blkio_group(struct blkio_group *blkg) |
146 | { | 205 | { |
147 | struct throtl_grp *tg = blkg_to_tg(blkg); | 206 | struct throtl_grp *tg = blkg_to_tg(blkg); |
@@ -155,6 +214,43 @@ static void throtl_init_blkio_group(struct blkio_group *blkg) | |||
155 | tg->bps[WRITE] = -1; | 214 | tg->bps[WRITE] = -1; |
156 | tg->iops[READ] = -1; | 215 | tg->iops[READ] = -1; |
157 | tg->iops[WRITE] = -1; | 216 | tg->iops[WRITE] = -1; |
217 | |||
218 | /* | ||
219 | * Ugh... We need to perform per-cpu allocation for tg->stats_cpu | ||
220 | * but percpu allocator can't be called from IO path. Queue tg on | ||
221 | * tg_stats_alloc_list and allocate from work item. | ||
222 | */ | ||
223 | spin_lock(&tg_stats_alloc_lock); | ||
224 | list_add(&tg->stats_alloc_node, &tg_stats_alloc_list); | ||
225 | queue_delayed_work(system_nrt_wq, &tg_stats_alloc_work, 0); | ||
226 | spin_unlock(&tg_stats_alloc_lock); | ||
227 | } | ||
228 | |||
229 | static void throtl_exit_blkio_group(struct blkio_group *blkg) | ||
230 | { | ||
231 | struct throtl_grp *tg = blkg_to_tg(blkg); | ||
232 | |||
233 | spin_lock(&tg_stats_alloc_lock); | ||
234 | list_del_init(&tg->stats_alloc_node); | ||
235 | spin_unlock(&tg_stats_alloc_lock); | ||
236 | |||
237 | free_percpu(tg->stats_cpu); | ||
238 | } | ||
239 | |||
240 | static void throtl_reset_group_stats(struct blkio_group *blkg) | ||
241 | { | ||
242 | struct throtl_grp *tg = blkg_to_tg(blkg); | ||
243 | int cpu; | ||
244 | |||
245 | if (tg->stats_cpu == NULL) | ||
246 | return; | ||
247 | |||
248 | for_each_possible_cpu(cpu) { | ||
249 | struct tg_stats_cpu *sc = per_cpu_ptr(tg->stats_cpu, cpu); | ||
250 | |||
251 | blkg_rwstat_reset(&sc->service_bytes); | ||
252 | blkg_rwstat_reset(&sc->serviced); | ||
253 | } | ||
158 | } | 254 | } |
159 | 255 | ||
160 | static struct | 256 | static struct |
@@ -565,12 +661,12 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg, | |||
565 | static void throtl_update_dispatch_stats(struct blkio_group *blkg, u64 bytes, | 661 | static void throtl_update_dispatch_stats(struct blkio_group *blkg, u64 bytes, |
566 | int rw) | 662 | int rw) |
567 | { | 663 | { |
568 | struct blkg_policy_data *pd = blkg->pd[BLKIO_POLICY_THROTL]; | 664 | struct throtl_grp *tg = blkg_to_tg(blkg); |
569 | struct blkio_group_stats_cpu *stats_cpu; | 665 | struct tg_stats_cpu *stats_cpu; |
570 | unsigned long flags; | 666 | unsigned long flags; |
571 | 667 | ||
572 | /* If per cpu stats are not allocated yet, don't do any accounting. */ | 668 | /* If per cpu stats are not allocated yet, don't do any accounting. */ |
573 | if (pd->stats_cpu == NULL) | 669 | if (tg->stats_cpu == NULL) |
574 | return; | 670 | return; |
575 | 671 | ||
576 | /* | 672 | /* |
@@ -580,7 +676,7 @@ static void throtl_update_dispatch_stats(struct blkio_group *blkg, u64 bytes, | |||
580 | */ | 676 | */ |
581 | local_irq_save(flags); | 677 | local_irq_save(flags); |
582 | 678 | ||
583 | stats_cpu = this_cpu_ptr(pd->stats_cpu); | 679 | stats_cpu = this_cpu_ptr(tg->stats_cpu); |
584 | 680 | ||
585 | blkg_rwstat_add(&stats_cpu->serviced, rw, 1); | 681 | blkg_rwstat_add(&stats_cpu->serviced, rw, 1); |
586 | blkg_rwstat_add(&stats_cpu->service_bytes, rw, bytes); | 682 | blkg_rwstat_add(&stats_cpu->service_bytes, rw, bytes); |
@@ -842,15 +938,15 @@ static void throtl_update_blkio_group_common(struct throtl_data *td, | |||
842 | throtl_schedule_delayed_work(td, 0); | 938 | throtl_schedule_delayed_work(td, 0); |
843 | } | 939 | } |
844 | 940 | ||
845 | static u64 blkg_prfill_cpu_rwstat(struct seq_file *sf, | 941 | static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, |
846 | struct blkg_policy_data *pd, int off) | 942 | struct blkg_policy_data *pd, int off) |
847 | { | 943 | { |
944 | struct throtl_grp *tg = (void *)pd->pdata; | ||
848 | struct blkg_rwstat rwstat = { }, tmp; | 945 | struct blkg_rwstat rwstat = { }, tmp; |
849 | int i, cpu; | 946 | int i, cpu; |
850 | 947 | ||
851 | for_each_possible_cpu(cpu) { | 948 | for_each_possible_cpu(cpu) { |
852 | struct blkio_group_stats_cpu *sc = | 949 | struct tg_stats_cpu *sc = per_cpu_ptr(tg->stats_cpu, cpu); |
853 | per_cpu_ptr(pd->stats_cpu, cpu); | ||
854 | 950 | ||
855 | tmp = blkg_rwstat_read((void *)sc + off); | 951 | tmp = blkg_rwstat_read((void *)sc + off); |
856 | for (i = 0; i < BLKG_RWSTAT_NR; i++) | 952 | for (i = 0; i < BLKG_RWSTAT_NR; i++) |
@@ -861,12 +957,12 @@ static u64 blkg_prfill_cpu_rwstat(struct seq_file *sf, | |||
861 | } | 957 | } |
862 | 958 | ||
863 | /* print per-cpu blkg_rwstat specified by BLKCG_STAT_PRIV() */ | 959 | /* print per-cpu blkg_rwstat specified by BLKCG_STAT_PRIV() */ |
864 | static int blkcg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft, | 960 | static int tg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft, |
865 | struct seq_file *sf) | 961 | struct seq_file *sf) |
866 | { | 962 | { |
867 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); | 963 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); |
868 | 964 | ||
869 | blkcg_print_blkgs(sf, blkcg, blkg_prfill_cpu_rwstat, | 965 | blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, |
870 | BLKCG_STAT_POL(cft->private), | 966 | BLKCG_STAT_POL(cft->private), |
871 | BLKCG_STAT_OFF(cft->private), true); | 967 | BLKCG_STAT_OFF(cft->private), true); |
872 | return 0; | 968 | return 0; |
@@ -1012,14 +1108,14 @@ static struct cftype throtl_files[] = { | |||
1012 | { | 1108 | { |
1013 | .name = "throttle.io_service_bytes", | 1109 | .name = "throttle.io_service_bytes", |
1014 | .private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL, | 1110 | .private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL, |
1015 | offsetof(struct blkio_group_stats_cpu, service_bytes)), | 1111 | offsetof(struct tg_stats_cpu, service_bytes)), |
1016 | .read_seq_string = blkcg_print_cpu_rwstat, | 1112 | .read_seq_string = tg_print_cpu_rwstat, |
1017 | }, | 1113 | }, |
1018 | { | 1114 | { |
1019 | .name = "throttle.io_serviced", | 1115 | .name = "throttle.io_serviced", |
1020 | .private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL, | 1116 | .private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL, |
1021 | offsetof(struct blkio_group_stats_cpu, serviced)), | 1117 | offsetof(struct tg_stats_cpu, serviced)), |
1022 | .read_seq_string = blkcg_print_cpu_rwstat, | 1118 | .read_seq_string = tg_print_cpu_rwstat, |
1023 | }, | 1119 | }, |
1024 | { } /* terminate */ | 1120 | { } /* terminate */ |
1025 | }; | 1121 | }; |
@@ -1034,6 +1130,8 @@ static void throtl_shutdown_wq(struct request_queue *q) | |||
1034 | static struct blkio_policy_type blkio_policy_throtl = { | 1130 | static struct blkio_policy_type blkio_policy_throtl = { |
1035 | .ops = { | 1131 | .ops = { |
1036 | .blkio_init_group_fn = throtl_init_blkio_group, | 1132 | .blkio_init_group_fn = throtl_init_blkio_group, |
1133 | .blkio_exit_group_fn = throtl_exit_blkio_group, | ||
1134 | .blkio_reset_group_stats_fn = throtl_reset_group_stats, | ||
1037 | }, | 1135 | }, |
1038 | .plid = BLKIO_POLICY_THROTL, | 1136 | .plid = BLKIO_POLICY_THROTL, |
1039 | .pdata_size = sizeof(struct throtl_grp), | 1137 | .pdata_size = sizeof(struct throtl_grp), |