diff options
Diffstat (limited to 'kernel/sched/pelt.c')
-rw-r--r-- | kernel/sched/pelt.c | 311 |
1 files changed, 311 insertions, 0 deletions
diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c new file mode 100644 index 000000000000..e6ecbb2b8698 --- /dev/null +++ b/kernel/sched/pelt.c | |||
@@ -0,0 +1,311 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * Per Entity Load Tracking | ||
4 | * | ||
5 | * Copyright (C) 2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | ||
6 | * | ||
7 | * Interactivity improvements by Mike Galbraith | ||
8 | * (C) 2007 Mike Galbraith <efault@gmx.de> | ||
9 | * | ||
10 | * Various enhancements by Dmitry Adamushko. | ||
11 | * (C) 2007 Dmitry Adamushko <dmitry.adamushko@gmail.com> | ||
12 | * | ||
13 | * Group scheduling enhancements by Srivatsa Vaddagiri | ||
14 | * Copyright IBM Corporation, 2007 | ||
15 | * Author: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com> | ||
16 | * | ||
17 | * Scaled math optimizations by Thomas Gleixner | ||
18 | * Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de> | ||
19 | * | ||
20 | * Adaptive scheduling granularity, math enhancements by Peter Zijlstra | ||
21 | * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra | ||
22 | * | ||
23 | * Move PELT related code from fair.c into this pelt.c file | ||
24 | * Author: Vincent Guittot <vincent.guittot@linaro.org> | ||
25 | */ | ||
26 | |||
27 | #include <linux/sched.h> | ||
28 | #include "sched.h" | ||
29 | #include "sched-pelt.h" | ||
30 | #include "pelt.h" | ||
31 | |||
32 | /* | ||
33 | * Approximate: | ||
34 | * val * y^n, where y^32 ~= 0.5 (~1 scheduling period) | ||
35 | */ | ||
36 | static u64 decay_load(u64 val, u64 n) | ||
37 | { | ||
38 | unsigned int local_n; | ||
39 | |||
40 | if (unlikely(n > LOAD_AVG_PERIOD * 63)) | ||
41 | return 0; | ||
42 | |||
43 | /* after bounds checking we can collapse to 32-bit */ | ||
44 | local_n = n; | ||
45 | |||
46 | /* | ||
47 | * As y^PERIOD = 1/2, we can combine | ||
48 | * y^n = 1/2^(n/PERIOD) * y^(n%PERIOD) | ||
49 | * With a look-up table which covers y^n (n<PERIOD) | ||
50 | * | ||
51 | * To achieve constant time decay_load. | ||
52 | */ | ||
53 | if (unlikely(local_n >= LOAD_AVG_PERIOD)) { | ||
54 | val >>= local_n / LOAD_AVG_PERIOD; | ||
55 | local_n %= LOAD_AVG_PERIOD; | ||
56 | } | ||
57 | |||
58 | val = mul_u64_u32_shr(val, runnable_avg_yN_inv[local_n], 32); | ||
59 | return val; | ||
60 | } | ||
61 | |||
62 | static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3) | ||
63 | { | ||
64 | u32 c1, c2, c3 = d3; /* y^0 == 1 */ | ||
65 | |||
66 | /* | ||
67 | * c1 = d1 y^p | ||
68 | */ | ||
69 | c1 = decay_load((u64)d1, periods); | ||
70 | |||
71 | /* | ||
72 | * p-1 | ||
73 | * c2 = 1024 \Sum y^n | ||
74 | * n=1 | ||
75 | * | ||
76 | * inf inf | ||
77 | * = 1024 ( \Sum y^n - \Sum y^n - y^0 ) | ||
78 | * n=0 n=p | ||
79 | */ | ||
80 | c2 = LOAD_AVG_MAX - decay_load(LOAD_AVG_MAX, periods) - 1024; | ||
81 | |||
82 | return c1 + c2 + c3; | ||
83 | } | ||
84 | |||
85 | #define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT) | ||
86 | |||
87 | /* | ||
88 | * Accumulate the three separate parts of the sum; d1 the remainder | ||
89 | * of the last (incomplete) period, d2 the span of full periods and d3 | ||
90 | * the remainder of the (incomplete) current period. | ||
91 | * | ||
92 | * d1 d2 d3 | ||
93 | * ^ ^ ^ | ||
94 | * | | | | ||
95 | * |<->|<----------------->|<--->| | ||
96 | * ... |---x---|------| ... |------|-----x (now) | ||
97 | * | ||
98 | * p-1 | ||
99 | * u' = (u + d1) y^p + 1024 \Sum y^n + d3 y^0 | ||
100 | * n=1 | ||
101 | * | ||
102 | * = u y^p + (Step 1) | ||
103 | * | ||
104 | * p-1 | ||
105 | * d1 y^p + 1024 \Sum y^n + d3 y^0 (Step 2) | ||
106 | * n=1 | ||
107 | */ | ||
108 | static __always_inline u32 | ||
109 | accumulate_sum(u64 delta, int cpu, struct sched_avg *sa, | ||
110 | unsigned long load, unsigned long runnable, int running) | ||
111 | { | ||
112 | unsigned long scale_freq, scale_cpu; | ||
113 | u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */ | ||
114 | u64 periods; | ||
115 | |||
116 | scale_freq = arch_scale_freq_capacity(cpu); | ||
117 | scale_cpu = arch_scale_cpu_capacity(NULL, cpu); | ||
118 | |||
119 | delta += sa->period_contrib; | ||
120 | periods = delta / 1024; /* A period is 1024us (~1ms) */ | ||
121 | |||
122 | /* | ||
123 | * Step 1: decay old *_sum if we crossed period boundaries. | ||
124 | */ | ||
125 | if (periods) { | ||
126 | sa->load_sum = decay_load(sa->load_sum, periods); | ||
127 | sa->runnable_load_sum = | ||
128 | decay_load(sa->runnable_load_sum, periods); | ||
129 | sa->util_sum = decay_load((u64)(sa->util_sum), periods); | ||
130 | |||
131 | /* | ||
132 | * Step 2 | ||
133 | */ | ||
134 | delta %= 1024; | ||
135 | contrib = __accumulate_pelt_segments(periods, | ||
136 | 1024 - sa->period_contrib, delta); | ||
137 | } | ||
138 | sa->period_contrib = delta; | ||
139 | |||
140 | contrib = cap_scale(contrib, scale_freq); | ||
141 | if (load) | ||
142 | sa->load_sum += load * contrib; | ||
143 | if (runnable) | ||
144 | sa->runnable_load_sum += runnable * contrib; | ||
145 | if (running) | ||
146 | sa->util_sum += contrib * scale_cpu; | ||
147 | |||
148 | return periods; | ||
149 | } | ||
150 | |||
151 | /* | ||
152 | * We can represent the historical contribution to runnable average as the | ||
153 | * coefficients of a geometric series. To do this we sub-divide our runnable | ||
154 | * history into segments of approximately 1ms (1024us); label the segment that | ||
155 | * occurred N-ms ago p_N, with p_0 corresponding to the current period, e.g. | ||
156 | * | ||
157 | * [<- 1024us ->|<- 1024us ->|<- 1024us ->| ... | ||
158 | * p0 p1 p2 | ||
159 | * (now) (~1ms ago) (~2ms ago) | ||
160 | * | ||
161 | * Let u_i denote the fraction of p_i that the entity was runnable. | ||
162 | * | ||
163 | * We then designate the fractions u_i as our co-efficients, yielding the | ||
164 | * following representation of historical load: | ||
165 | * u_0 + u_1*y + u_2*y^2 + u_3*y^3 + ... | ||
166 | * | ||
167 | * We choose y based on the with of a reasonably scheduling period, fixing: | ||
168 | * y^32 = 0.5 | ||
169 | * | ||
170 | * This means that the contribution to load ~32ms ago (u_32) will be weighted | ||
171 | * approximately half as much as the contribution to load within the last ms | ||
172 | * (u_0). | ||
173 | * | ||
174 | * When a period "rolls over" and we have new u_0`, multiplying the previous | ||
175 | * sum again by y is sufficient to update: | ||
176 | * load_avg = u_0` + y*(u_0 + u_1*y + u_2*y^2 + ... ) | ||
177 | * = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}] | ||
178 | */ | ||
179 | static __always_inline int | ||
180 | ___update_load_sum(u64 now, int cpu, struct sched_avg *sa, | ||
181 | unsigned long load, unsigned long runnable, int running) | ||
182 | { | ||
183 | u64 delta; | ||
184 | |||
185 | delta = now - sa->last_update_time; | ||
186 | /* | ||
187 | * This should only happen when time goes backwards, which it | ||
188 | * unfortunately does during sched clock init when we swap over to TSC. | ||
189 | */ | ||
190 | if ((s64)delta < 0) { | ||
191 | sa->last_update_time = now; | ||
192 | return 0; | ||
193 | } | ||
194 | |||
195 | /* | ||
196 | * Use 1024ns as the unit of measurement since it's a reasonable | ||
197 | * approximation of 1us and fast to compute. | ||
198 | */ | ||
199 | delta >>= 10; | ||
200 | if (!delta) | ||
201 | return 0; | ||
202 | |||
203 | sa->last_update_time += delta << 10; | ||
204 | |||
205 | /* | ||
206 | * running is a subset of runnable (weight) so running can't be set if | ||
207 | * runnable is clear. But there are some corner cases where the current | ||
208 | * se has been already dequeued but cfs_rq->curr still points to it. | ||
209 | * This means that weight will be 0 but not running for a sched_entity | ||
210 | * but also for a cfs_rq if the latter becomes idle. As an example, | ||
211 | * this happens during idle_balance() which calls | ||
212 | * update_blocked_averages() | ||
213 | */ | ||
214 | if (!load) | ||
215 | runnable = running = 0; | ||
216 | |||
217 | /* | ||
218 | * Now we know we crossed measurement unit boundaries. The *_avg | ||
219 | * accrues by two steps: | ||
220 | * | ||
221 | * Step 1: accumulate *_sum since last_update_time. If we haven't | ||
222 | * crossed period boundaries, finish. | ||
223 | */ | ||
224 | if (!accumulate_sum(delta, cpu, sa, load, runnable, running)) | ||
225 | return 0; | ||
226 | |||
227 | return 1; | ||
228 | } | ||
229 | |||
230 | static __always_inline void | ||
231 | ___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runnable) | ||
232 | { | ||
233 | u32 divider = LOAD_AVG_MAX - 1024 + sa->period_contrib; | ||
234 | |||
235 | /* | ||
236 | * Step 2: update *_avg. | ||
237 | */ | ||
238 | sa->load_avg = div_u64(load * sa->load_sum, divider); | ||
239 | sa->runnable_load_avg = div_u64(runnable * sa->runnable_load_sum, divider); | ||
240 | sa->util_avg = sa->util_sum / divider; | ||
241 | } | ||
242 | |||
243 | /* | ||
244 | * sched_entity: | ||
245 | * | ||
246 | * task: | ||
247 | * se_runnable() == se_weight() | ||
248 | * | ||
249 | * group: [ see update_cfs_group() ] | ||
250 | * se_weight() = tg->weight * grq->load_avg / tg->load_avg | ||
251 | * se_runnable() = se_weight(se) * grq->runnable_load_avg / grq->load_avg | ||
252 | * | ||
253 | * load_sum := runnable_sum | ||
254 | * load_avg = se_weight(se) * runnable_avg | ||
255 | * | ||
256 | * runnable_load_sum := runnable_sum | ||
257 | * runnable_load_avg = se_runnable(se) * runnable_avg | ||
258 | * | ||
259 | * XXX collapse load_sum and runnable_load_sum | ||
260 | * | ||
261 | * cfq_rq: | ||
262 | * | ||
263 | * load_sum = \Sum se_weight(se) * se->avg.load_sum | ||
264 | * load_avg = \Sum se->avg.load_avg | ||
265 | * | ||
266 | * runnable_load_sum = \Sum se_runnable(se) * se->avg.runnable_load_sum | ||
267 | * runnable_load_avg = \Sum se->avg.runable_load_avg | ||
268 | */ | ||
269 | |||
270 | int __update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se) | ||
271 | { | ||
272 | if (entity_is_task(se)) | ||
273 | se->runnable_weight = se->load.weight; | ||
274 | |||
275 | if (___update_load_sum(now, cpu, &se->avg, 0, 0, 0)) { | ||
276 | ___update_load_avg(&se->avg, se_weight(se), se_runnable(se)); | ||
277 | return 1; | ||
278 | } | ||
279 | |||
280 | return 0; | ||
281 | } | ||
282 | |||
283 | int __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se) | ||
284 | { | ||
285 | if (entity_is_task(se)) | ||
286 | se->runnable_weight = se->load.weight; | ||
287 | |||
288 | if (___update_load_sum(now, cpu, &se->avg, !!se->on_rq, !!se->on_rq, | ||
289 | cfs_rq->curr == se)) { | ||
290 | |||
291 | ___update_load_avg(&se->avg, se_weight(se), se_runnable(se)); | ||
292 | cfs_se_util_change(&se->avg); | ||
293 | return 1; | ||
294 | } | ||
295 | |||
296 | return 0; | ||
297 | } | ||
298 | |||
299 | int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq) | ||
300 | { | ||
301 | if (___update_load_sum(now, cpu, &cfs_rq->avg, | ||
302 | scale_load_down(cfs_rq->load.weight), | ||
303 | scale_load_down(cfs_rq->runnable_weight), | ||
304 | cfs_rq->curr != NULL)) { | ||
305 | |||
306 | ___update_load_avg(&cfs_rq->avg, 1, 1); | ||
307 | return 1; | ||
308 | } | ||
309 | |||
310 | return 0; | ||
311 | } | ||