diff options
author | Paul Gortmaker <paul.gortmaker@windriver.com> | 2013-04-19 15:10:49 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-05-07 07:14:50 -0400 |
commit | 45ceebf77653975815d82fcf7cec0a164215ae11 (patch) | |
tree | 07171b93e073a58f80747885614451e0a5705728 /kernel/sched/proc.c | |
parent | 534c97b0950b1967bca1c753aeaed32f5db40264 (diff) |
sched: Factor out load calculation code from sched/core.c --> sched/proc.c
This large chunk of load calculation code can be easily divorced
from the main core.c scheduler file, with only a couple
prototypes and externs added to a kernel/sched header.
Some recent commits expanded the code and the documentation of
it, making it large enough to warrant separation. For example,
see:
556061b, "sched/nohz: Fix rq->cpu_load[] calculations"
5aaa0b7, "sched/nohz: Fix rq->cpu_load calculations some more"
5167e8d, "sched/nohz: Rewrite and fix load-avg computation -- again"
More importantly, it helps reduce the size of the main
sched/core.c by yet another significant amount (~600 lines).
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/1366398650-31599-2-git-send-email-paul.gortmaker@windriver.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched/proc.c')
-rw-r--r-- | kernel/sched/proc.c | 578 |
1 files changed, 578 insertions, 0 deletions
diff --git a/kernel/sched/proc.c b/kernel/sched/proc.c new file mode 100644 index 000000000000..bb3a6a0b8623 --- /dev/null +++ b/kernel/sched/proc.c | |||
@@ -0,0 +1,578 @@ | |||
1 | /* | ||
2 | * kernel/sched/proc.c | ||
3 | * | ||
4 | * Kernel load calculations, forked from sched/core.c | ||
5 | */ | ||
6 | |||
7 | #include <linux/export.h> | ||
8 | |||
9 | #include "sched.h" | ||
10 | |||
11 | unsigned long this_cpu_load(void) | ||
12 | { | ||
13 | struct rq *this = this_rq(); | ||
14 | return this->cpu_load[0]; | ||
15 | } | ||
16 | |||
17 | |||
18 | /* | ||
19 | * Global load-average calculations | ||
20 | * | ||
21 | * We take a distributed and async approach to calculating the global load-avg | ||
22 | * in order to minimize overhead. | ||
23 | * | ||
24 | * The global load average is an exponentially decaying average of nr_running + | ||
25 | * nr_uninterruptible. | ||
26 | * | ||
27 | * Once every LOAD_FREQ: | ||
28 | * | ||
29 | * nr_active = 0; | ||
30 | * for_each_possible_cpu(cpu) | ||
31 | * nr_active += cpu_of(cpu)->nr_running + cpu_of(cpu)->nr_uninterruptible; | ||
32 | * | ||
33 | * avenrun[n] = avenrun[0] * exp_n + nr_active * (1 - exp_n) | ||
34 | * | ||
35 | * Due to a number of reasons the above turns in the mess below: | ||
36 | * | ||
37 | * - for_each_possible_cpu() is prohibitively expensive on machines with | ||
38 | * serious number of cpus, therefore we need to take a distributed approach | ||
39 | * to calculating nr_active. | ||
40 | * | ||
41 | * \Sum_i x_i(t) = \Sum_i x_i(t) - x_i(t_0) | x_i(t_0) := 0 | ||
42 | * = \Sum_i { \Sum_j=1 x_i(t_j) - x_i(t_j-1) } | ||
43 | * | ||
44 | * So assuming nr_active := 0 when we start out -- true per definition, we | ||
45 | * can simply take per-cpu deltas and fold those into a global accumulate | ||
46 | * to obtain the same result. See calc_load_fold_active(). | ||
47 | * | ||
48 | * Furthermore, in order to avoid synchronizing all per-cpu delta folding | ||
49 | * across the machine, we assume 10 ticks is sufficient time for every | ||
50 | * cpu to have completed this task. | ||
51 | * | ||
52 | * This places an upper-bound on the IRQ-off latency of the machine. Then | ||
53 | * again, being late doesn't loose the delta, just wrecks the sample. | ||
54 | * | ||
55 | * - cpu_rq()->nr_uninterruptible isn't accurately tracked per-cpu because | ||
56 | * this would add another cross-cpu cacheline miss and atomic operation | ||
57 | * to the wakeup path. Instead we increment on whatever cpu the task ran | ||
58 | * when it went into uninterruptible state and decrement on whatever cpu | ||
59 | * did the wakeup. This means that only the sum of nr_uninterruptible over | ||
60 | * all cpus yields the correct result. | ||
61 | * | ||
62 | * This covers the NO_HZ=n code, for extra head-aches, see the comment below. | ||
63 | */ | ||
64 | |||
65 | /* Variables and functions for calc_load */ | ||
66 | atomic_long_t calc_load_tasks; | ||
67 | unsigned long calc_load_update; | ||
68 | unsigned long avenrun[3]; | ||
69 | EXPORT_SYMBOL(avenrun); /* should be removed */ | ||
70 | |||
71 | /** | ||
72 | * get_avenrun - get the load average array | ||
73 | * @loads: pointer to dest load array | ||
74 | * @offset: offset to add | ||
75 | * @shift: shift count to shift the result left | ||
76 | * | ||
77 | * These values are estimates at best, so no need for locking. | ||
78 | */ | ||
79 | void get_avenrun(unsigned long *loads, unsigned long offset, int shift) | ||
80 | { | ||
81 | loads[0] = (avenrun[0] + offset) << shift; | ||
82 | loads[1] = (avenrun[1] + offset) << shift; | ||
83 | loads[2] = (avenrun[2] + offset) << shift; | ||
84 | } | ||
85 | |||
86 | long calc_load_fold_active(struct rq *this_rq) | ||
87 | { | ||
88 | long nr_active, delta = 0; | ||
89 | |||
90 | nr_active = this_rq->nr_running; | ||
91 | nr_active += (long) this_rq->nr_uninterruptible; | ||
92 | |||
93 | if (nr_active != this_rq->calc_load_active) { | ||
94 | delta = nr_active - this_rq->calc_load_active; | ||
95 | this_rq->calc_load_active = nr_active; | ||
96 | } | ||
97 | |||
98 | return delta; | ||
99 | } | ||
100 | |||
101 | /* | ||
102 | * a1 = a0 * e + a * (1 - e) | ||
103 | */ | ||
104 | static unsigned long | ||
105 | calc_load(unsigned long load, unsigned long exp, unsigned long active) | ||
106 | { | ||
107 | load *= exp; | ||
108 | load += active * (FIXED_1 - exp); | ||
109 | load += 1UL << (FSHIFT - 1); | ||
110 | return load >> FSHIFT; | ||
111 | } | ||
112 | |||
113 | #ifdef CONFIG_NO_HZ_COMMON | ||
114 | /* | ||
115 | * Handle NO_HZ for the global load-average. | ||
116 | * | ||
117 | * Since the above described distributed algorithm to compute the global | ||
118 | * load-average relies on per-cpu sampling from the tick, it is affected by | ||
119 | * NO_HZ. | ||
120 | * | ||
121 | * The basic idea is to fold the nr_active delta into a global idle-delta upon | ||
122 | * entering NO_HZ state such that we can include this as an 'extra' cpu delta | ||
123 | * when we read the global state. | ||
124 | * | ||
125 | * Obviously reality has to ruin such a delightfully simple scheme: | ||
126 | * | ||
127 | * - When we go NO_HZ idle during the window, we can negate our sample | ||
128 | * contribution, causing under-accounting. | ||
129 | * | ||
130 | * We avoid this by keeping two idle-delta counters and flipping them | ||
131 | * when the window starts, thus separating old and new NO_HZ load. | ||
132 | * | ||
133 | * The only trick is the slight shift in index flip for read vs write. | ||
134 | * | ||
135 | * 0s 5s 10s 15s | ||
136 | * +10 +10 +10 +10 | ||
137 | * |-|-----------|-|-----------|-|-----------|-| | ||
138 | * r:0 0 1 1 0 0 1 1 0 | ||
139 | * w:0 1 1 0 0 1 1 0 0 | ||
140 | * | ||
141 | * This ensures we'll fold the old idle contribution in this window while | ||
142 | * accumlating the new one. | ||
143 | * | ||
144 | * - When we wake up from NO_HZ idle during the window, we push up our | ||
145 | * contribution, since we effectively move our sample point to a known | ||
146 | * busy state. | ||
147 | * | ||
148 | * This is solved by pushing the window forward, and thus skipping the | ||
149 | * sample, for this cpu (effectively using the idle-delta for this cpu which | ||
150 | * was in effect at the time the window opened). This also solves the issue | ||
151 | * of having to deal with a cpu having been in NOHZ idle for multiple | ||
152 | * LOAD_FREQ intervals. | ||
153 | * | ||
154 | * When making the ILB scale, we should try to pull this in as well. | ||
155 | */ | ||
156 | static atomic_long_t calc_load_idle[2]; | ||
157 | static int calc_load_idx; | ||
158 | |||
159 | static inline int calc_load_write_idx(void) | ||
160 | { | ||
161 | int idx = calc_load_idx; | ||
162 | |||
163 | /* | ||
164 | * See calc_global_nohz(), if we observe the new index, we also | ||
165 | * need to observe the new update time. | ||
166 | */ | ||
167 | smp_rmb(); | ||
168 | |||
169 | /* | ||
170 | * If the folding window started, make sure we start writing in the | ||
171 | * next idle-delta. | ||
172 | */ | ||
173 | if (!time_before(jiffies, calc_load_update)) | ||
174 | idx++; | ||
175 | |||
176 | return idx & 1; | ||
177 | } | ||
178 | |||
179 | static inline int calc_load_read_idx(void) | ||
180 | { | ||
181 | return calc_load_idx & 1; | ||
182 | } | ||
183 | |||
184 | void calc_load_enter_idle(void) | ||
185 | { | ||
186 | struct rq *this_rq = this_rq(); | ||
187 | long delta; | ||
188 | |||
189 | /* | ||
190 | * We're going into NOHZ mode, if there's any pending delta, fold it | ||
191 | * into the pending idle delta. | ||
192 | */ | ||
193 | delta = calc_load_fold_active(this_rq); | ||
194 | if (delta) { | ||
195 | int idx = calc_load_write_idx(); | ||
196 | atomic_long_add(delta, &calc_load_idle[idx]); | ||
197 | } | ||
198 | } | ||
199 | |||
200 | void calc_load_exit_idle(void) | ||
201 | { | ||
202 | struct rq *this_rq = this_rq(); | ||
203 | |||
204 | /* | ||
205 | * If we're still before the sample window, we're done. | ||
206 | */ | ||
207 | if (time_before(jiffies, this_rq->calc_load_update)) | ||
208 | return; | ||
209 | |||
210 | /* | ||
211 | * We woke inside or after the sample window, this means we're already | ||
212 | * accounted through the nohz accounting, so skip the entire deal and | ||
213 | * sync up for the next window. | ||
214 | */ | ||
215 | this_rq->calc_load_update = calc_load_update; | ||
216 | if (time_before(jiffies, this_rq->calc_load_update + 10)) | ||
217 | this_rq->calc_load_update += LOAD_FREQ; | ||
218 | } | ||
219 | |||
220 | static long calc_load_fold_idle(void) | ||
221 | { | ||
222 | int idx = calc_load_read_idx(); | ||
223 | long delta = 0; | ||
224 | |||
225 | if (atomic_long_read(&calc_load_idle[idx])) | ||
226 | delta = atomic_long_xchg(&calc_load_idle[idx], 0); | ||
227 | |||
228 | return delta; | ||
229 | } | ||
230 | |||
231 | /** | ||
232 | * fixed_power_int - compute: x^n, in O(log n) time | ||
233 | * | ||
234 | * @x: base of the power | ||
235 | * @frac_bits: fractional bits of @x | ||
236 | * @n: power to raise @x to. | ||
237 | * | ||
238 | * By exploiting the relation between the definition of the natural power | ||
239 | * function: x^n := x*x*...*x (x multiplied by itself for n times), and | ||
240 | * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i, | ||
241 | * (where: n_i \elem {0, 1}, the binary vector representing n), | ||
242 | * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is | ||
243 | * of course trivially computable in O(log_2 n), the length of our binary | ||
244 | * vector. | ||
245 | */ | ||
246 | static unsigned long | ||
247 | fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n) | ||
248 | { | ||
249 | unsigned long result = 1UL << frac_bits; | ||
250 | |||
251 | if (n) for (;;) { | ||
252 | if (n & 1) { | ||
253 | result *= x; | ||
254 | result += 1UL << (frac_bits - 1); | ||
255 | result >>= frac_bits; | ||
256 | } | ||
257 | n >>= 1; | ||
258 | if (!n) | ||
259 | break; | ||
260 | x *= x; | ||
261 | x += 1UL << (frac_bits - 1); | ||
262 | x >>= frac_bits; | ||
263 | } | ||
264 | |||
265 | return result; | ||
266 | } | ||
267 | |||
268 | /* | ||
269 | * a1 = a0 * e + a * (1 - e) | ||
270 | * | ||
271 | * a2 = a1 * e + a * (1 - e) | ||
272 | * = (a0 * e + a * (1 - e)) * e + a * (1 - e) | ||
273 | * = a0 * e^2 + a * (1 - e) * (1 + e) | ||
274 | * | ||
275 | * a3 = a2 * e + a * (1 - e) | ||
276 | * = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e) | ||
277 | * = a0 * e^3 + a * (1 - e) * (1 + e + e^2) | ||
278 | * | ||
279 | * ... | ||
280 | * | ||
281 | * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1] | ||
282 | * = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e) | ||
283 | * = a0 * e^n + a * (1 - e^n) | ||
284 | * | ||
285 | * [1] application of the geometric series: | ||
286 | * | ||
287 | * n 1 - x^(n+1) | ||
288 | * S_n := \Sum x^i = ------------- | ||
289 | * i=0 1 - x | ||
290 | */ | ||
291 | static unsigned long | ||
292 | calc_load_n(unsigned long load, unsigned long exp, | ||
293 | unsigned long active, unsigned int n) | ||
294 | { | ||
295 | |||
296 | return calc_load(load, fixed_power_int(exp, FSHIFT, n), active); | ||
297 | } | ||
298 | |||
299 | /* | ||
300 | * NO_HZ can leave us missing all per-cpu ticks calling | ||
301 | * calc_load_account_active(), but since an idle CPU folds its delta into | ||
302 | * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold | ||
303 | * in the pending idle delta if our idle period crossed a load cycle boundary. | ||
304 | * | ||
305 | * Once we've updated the global active value, we need to apply the exponential | ||
306 | * weights adjusted to the number of cycles missed. | ||
307 | */ | ||
308 | static void calc_global_nohz(void) | ||
309 | { | ||
310 | long delta, active, n; | ||
311 | |||
312 | if (!time_before(jiffies, calc_load_update + 10)) { | ||
313 | /* | ||
314 | * Catch-up, fold however many we are behind still | ||
315 | */ | ||
316 | delta = jiffies - calc_load_update - 10; | ||
317 | n = 1 + (delta / LOAD_FREQ); | ||
318 | |||
319 | active = atomic_long_read(&calc_load_tasks); | ||
320 | active = active > 0 ? active * FIXED_1 : 0; | ||
321 | |||
322 | avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); | ||
323 | avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); | ||
324 | avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); | ||
325 | |||
326 | calc_load_update += n * LOAD_FREQ; | ||
327 | } | ||
328 | |||
329 | /* | ||
330 | * Flip the idle index... | ||
331 | * | ||
332 | * Make sure we first write the new time then flip the index, so that | ||
333 | * calc_load_write_idx() will see the new time when it reads the new | ||
334 | * index, this avoids a double flip messing things up. | ||
335 | */ | ||
336 | smp_wmb(); | ||
337 | calc_load_idx++; | ||
338 | } | ||
339 | #else /* !CONFIG_NO_HZ_COMMON */ | ||
340 | |||
341 | static inline long calc_load_fold_idle(void) { return 0; } | ||
342 | static inline void calc_global_nohz(void) { } | ||
343 | |||
344 | #endif /* CONFIG_NO_HZ_COMMON */ | ||
345 | |||
346 | /* | ||
347 | * calc_load - update the avenrun load estimates 10 ticks after the | ||
348 | * CPUs have updated calc_load_tasks. | ||
349 | */ | ||
350 | void calc_global_load(unsigned long ticks) | ||
351 | { | ||
352 | long active, delta; | ||
353 | |||
354 | if (time_before(jiffies, calc_load_update + 10)) | ||
355 | return; | ||
356 | |||
357 | /* | ||
358 | * Fold the 'old' idle-delta to include all NO_HZ cpus. | ||
359 | */ | ||
360 | delta = calc_load_fold_idle(); | ||
361 | if (delta) | ||
362 | atomic_long_add(delta, &calc_load_tasks); | ||
363 | |||
364 | active = atomic_long_read(&calc_load_tasks); | ||
365 | active = active > 0 ? active * FIXED_1 : 0; | ||
366 | |||
367 | avenrun[0] = calc_load(avenrun[0], EXP_1, active); | ||
368 | avenrun[1] = calc_load(avenrun[1], EXP_5, active); | ||
369 | avenrun[2] = calc_load(avenrun[2], EXP_15, active); | ||
370 | |||
371 | calc_load_update += LOAD_FREQ; | ||
372 | |||
373 | /* | ||
374 | * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk. | ||
375 | */ | ||
376 | calc_global_nohz(); | ||
377 | } | ||
378 | |||
379 | /* | ||
380 | * Called from update_cpu_load() to periodically update this CPU's | ||
381 | * active count. | ||
382 | */ | ||
383 | static void calc_load_account_active(struct rq *this_rq) | ||
384 | { | ||
385 | long delta; | ||
386 | |||
387 | if (time_before(jiffies, this_rq->calc_load_update)) | ||
388 | return; | ||
389 | |||
390 | delta = calc_load_fold_active(this_rq); | ||
391 | if (delta) | ||
392 | atomic_long_add(delta, &calc_load_tasks); | ||
393 | |||
394 | this_rq->calc_load_update += LOAD_FREQ; | ||
395 | } | ||
396 | |||
397 | /* | ||
398 | * End of global load-average stuff | ||
399 | */ | ||
400 | |||
401 | /* | ||
402 | * The exact cpuload at various idx values, calculated at every tick would be | ||
403 | * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load | ||
404 | * | ||
405 | * If a cpu misses updates for n-1 ticks (as it was idle) and update gets called | ||
406 | * on nth tick when cpu may be busy, then we have: | ||
407 | * load = ((2^idx - 1) / 2^idx)^(n-1) * load | ||
408 | * load = (2^idx - 1) / 2^idx) * load + 1 / 2^idx * cur_load | ||
409 | * | ||
410 | * decay_load_missed() below does efficient calculation of | ||
411 | * load = ((2^idx - 1) / 2^idx)^(n-1) * load | ||
412 | * avoiding 0..n-1 loop doing load = ((2^idx - 1) / 2^idx) * load | ||
413 | * | ||
414 | * The calculation is approximated on a 128 point scale. | ||
415 | * degrade_zero_ticks is the number of ticks after which load at any | ||
416 | * particular idx is approximated to be zero. | ||
417 | * degrade_factor is a precomputed table, a row for each load idx. | ||
418 | * Each column corresponds to degradation factor for a power of two ticks, | ||
419 | * based on 128 point scale. | ||
420 | * Example: | ||
421 | * row 2, col 3 (=12) says that the degradation at load idx 2 after | ||
422 | * 8 ticks is 12/128 (which is an approximation of exact factor 3^8/4^8). | ||
423 | * | ||
424 | * With this power of 2 load factors, we can degrade the load n times | ||
425 | * by looking at 1 bits in n and doing as many mult/shift instead of | ||
426 | * n mult/shifts needed by the exact degradation. | ||
427 | */ | ||
428 | #define DEGRADE_SHIFT 7 | ||
429 | static const unsigned char | ||
430 | degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128}; | ||
431 | static const unsigned char | ||
432 | degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = { | ||
433 | {0, 0, 0, 0, 0, 0, 0, 0}, | ||
434 | {64, 32, 8, 0, 0, 0, 0, 0}, | ||
435 | {96, 72, 40, 12, 1, 0, 0}, | ||
436 | {112, 98, 75, 43, 15, 1, 0}, | ||
437 | {120, 112, 98, 76, 45, 16, 2} }; | ||
438 | |||
439 | /* | ||
440 | * Update cpu_load for any missed ticks, due to tickless idle. The backlog | ||
441 | * would be when CPU is idle and so we just decay the old load without | ||
442 | * adding any new load. | ||
443 | */ | ||
444 | static unsigned long | ||
445 | decay_load_missed(unsigned long load, unsigned long missed_updates, int idx) | ||
446 | { | ||
447 | int j = 0; | ||
448 | |||
449 | if (!missed_updates) | ||
450 | return load; | ||
451 | |||
452 | if (missed_updates >= degrade_zero_ticks[idx]) | ||
453 | return 0; | ||
454 | |||
455 | if (idx == 1) | ||
456 | return load >> missed_updates; | ||
457 | |||
458 | while (missed_updates) { | ||
459 | if (missed_updates % 2) | ||
460 | load = (load * degrade_factor[idx][j]) >> DEGRADE_SHIFT; | ||
461 | |||
462 | missed_updates >>= 1; | ||
463 | j++; | ||
464 | } | ||
465 | return load; | ||
466 | } | ||
467 | |||
468 | /* | ||
469 | * Update rq->cpu_load[] statistics. This function is usually called every | ||
470 | * scheduler tick (TICK_NSEC). With tickless idle this will not be called | ||
471 | * every tick. We fix it up based on jiffies. | ||
472 | */ | ||
473 | static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, | ||
474 | unsigned long pending_updates) | ||
475 | { | ||
476 | int i, scale; | ||
477 | |||
478 | this_rq->nr_load_updates++; | ||
479 | |||
480 | /* Update our load: */ | ||
481 | this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */ | ||
482 | for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) { | ||
483 | unsigned long old_load, new_load; | ||
484 | |||
485 | /* scale is effectively 1 << i now, and >> i divides by scale */ | ||
486 | |||
487 | old_load = this_rq->cpu_load[i]; | ||
488 | old_load = decay_load_missed(old_load, pending_updates - 1, i); | ||
489 | new_load = this_load; | ||
490 | /* | ||
491 | * Round up the averaging division if load is increasing. This | ||
492 | * prevents us from getting stuck on 9 if the load is 10, for | ||
493 | * example. | ||
494 | */ | ||
495 | if (new_load > old_load) | ||
496 | new_load += scale - 1; | ||
497 | |||
498 | this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i; | ||
499 | } | ||
500 | |||
501 | sched_avg_update(this_rq); | ||
502 | } | ||
503 | |||
504 | #ifdef CONFIG_NO_HZ_COMMON | ||
505 | /* | ||
506 | * There is no sane way to deal with nohz on smp when using jiffies because the | ||
507 | * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading | ||
508 | * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}. | ||
509 | * | ||
510 | * Therefore we cannot use the delta approach from the regular tick since that | ||
511 | * would seriously skew the load calculation. However we'll make do for those | ||
512 | * updates happening while idle (nohz_idle_balance) or coming out of idle | ||
513 | * (tick_nohz_idle_exit). | ||
514 | * | ||
515 | * This means we might still be one tick off for nohz periods. | ||
516 | */ | ||
517 | |||
518 | /* | ||
519 | * Called from nohz_idle_balance() to update the load ratings before doing the | ||
520 | * idle balance. | ||
521 | */ | ||
522 | void update_idle_cpu_load(struct rq *this_rq) | ||
523 | { | ||
524 | unsigned long curr_jiffies = ACCESS_ONCE(jiffies); | ||
525 | unsigned long load = this_rq->load.weight; | ||
526 | unsigned long pending_updates; | ||
527 | |||
528 | /* | ||
529 | * bail if there's load or we're actually up-to-date. | ||
530 | */ | ||
531 | if (load || curr_jiffies == this_rq->last_load_update_tick) | ||
532 | return; | ||
533 | |||
534 | pending_updates = curr_jiffies - this_rq->last_load_update_tick; | ||
535 | this_rq->last_load_update_tick = curr_jiffies; | ||
536 | |||
537 | __update_cpu_load(this_rq, load, pending_updates); | ||
538 | } | ||
539 | |||
540 | /* | ||
541 | * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed. | ||
542 | */ | ||
543 | void update_cpu_load_nohz(void) | ||
544 | { | ||
545 | struct rq *this_rq = this_rq(); | ||
546 | unsigned long curr_jiffies = ACCESS_ONCE(jiffies); | ||
547 | unsigned long pending_updates; | ||
548 | |||
549 | if (curr_jiffies == this_rq->last_load_update_tick) | ||
550 | return; | ||
551 | |||
552 | raw_spin_lock(&this_rq->lock); | ||
553 | pending_updates = curr_jiffies - this_rq->last_load_update_tick; | ||
554 | if (pending_updates) { | ||
555 | this_rq->last_load_update_tick = curr_jiffies; | ||
556 | /* | ||
557 | * We were idle, this means load 0, the current load might be | ||
558 | * !0 due to remote wakeups and the sort. | ||
559 | */ | ||
560 | __update_cpu_load(this_rq, 0, pending_updates); | ||
561 | } | ||
562 | raw_spin_unlock(&this_rq->lock); | ||
563 | } | ||
564 | #endif /* CONFIG_NO_HZ */ | ||
565 | |||
566 | /* | ||
567 | * Called from scheduler_tick() | ||
568 | */ | ||
569 | void update_cpu_load_active(struct rq *this_rq) | ||
570 | { | ||
571 | /* | ||
572 | * See the mess around update_idle_cpu_load() / update_cpu_load_nohz(). | ||
573 | */ | ||
574 | this_rq->last_load_update_tick = jiffies; | ||
575 | __update_cpu_load(this_rq, this_rq->load.weight, 1); | ||
576 | |||
577 | calc_load_account_active(this_rq); | ||
578 | } | ||