diff options
Diffstat (limited to 'kernel/sched/loadavg.c')
-rw-r--r-- | kernel/sched/loadavg.c | 51 |
1 files changed, 26 insertions, 25 deletions
diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c index f15fb2bdbc0d..f14716a3522f 100644 --- a/kernel/sched/loadavg.c +++ b/kernel/sched/loadavg.c | |||
@@ -117,7 +117,7 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) | |||
117 | * load-average relies on per-cpu sampling from the tick, it is affected by | 117 | * load-average relies on per-cpu sampling from the tick, it is affected by |
118 | * NO_HZ. | 118 | * NO_HZ. |
119 | * | 119 | * |
120 | * The basic idea is to fold the nr_active delta into a global idle-delta upon | 120 | * The basic idea is to fold the nr_active delta into a global NO_HZ-delta upon |
121 | * entering NO_HZ state such that we can include this as an 'extra' cpu delta | 121 | * entering NO_HZ state such that we can include this as an 'extra' cpu delta |
122 | * when we read the global state. | 122 | * when we read the global state. |
123 | * | 123 | * |
@@ -126,7 +126,7 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) | |||
126 | * - When we go NO_HZ idle during the window, we can negate our sample | 126 | * - When we go NO_HZ idle during the window, we can negate our sample |
127 | * contribution, causing under-accounting. | 127 | * contribution, causing under-accounting. |
128 | * | 128 | * |
129 | * We avoid this by keeping two idle-delta counters and flipping them | 129 | * We avoid this by keeping two NO_HZ-delta counters and flipping them |
130 | * when the window starts, thus separating old and new NO_HZ load. | 130 | * when the window starts, thus separating old and new NO_HZ load. |
131 | * | 131 | * |
132 | * The only trick is the slight shift in index flip for read vs write. | 132 | * The only trick is the slight shift in index flip for read vs write. |
@@ -137,22 +137,22 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) | |||
137 | * r:0 0 1 1 0 0 1 1 0 | 137 | * r:0 0 1 1 0 0 1 1 0 |
138 | * w:0 1 1 0 0 1 1 0 0 | 138 | * w:0 1 1 0 0 1 1 0 0 |
139 | * | 139 | * |
140 | * This ensures we'll fold the old idle contribution in this window while | 140 | * This ensures we'll fold the old NO_HZ contribution in this window while |
141 | * accumlating the new one. | 141 | * accumlating the new one. |
142 | * | 142 | * |
143 | * - When we wake up from NO_HZ idle during the window, we push up our | 143 | * - When we wake up from NO_HZ during the window, we push up our |
144 | * contribution, since we effectively move our sample point to a known | 144 | * contribution, since we effectively move our sample point to a known |
145 | * busy state. | 145 | * busy state. |
146 | * | 146 | * |
147 | * This is solved by pushing the window forward, and thus skipping the | 147 | * This is solved by pushing the window forward, and thus skipping the |
148 | * sample, for this cpu (effectively using the idle-delta for this cpu which | 148 | * sample, for this cpu (effectively using the NO_HZ-delta for this cpu which |
149 | * was in effect at the time the window opened). This also solves the issue | 149 | * was in effect at the time the window opened). This also solves the issue |
150 | * of having to deal with a cpu having been in NOHZ idle for multiple | 150 | * of having to deal with a cpu having been in NO_HZ for multiple LOAD_FREQ |
151 | * LOAD_FREQ intervals. | 151 | * intervals. |
152 | * | 152 | * |
153 | * When making the ILB scale, we should try to pull this in as well. | 153 | * When making the ILB scale, we should try to pull this in as well. |
154 | */ | 154 | */ |
155 | static atomic_long_t calc_load_idle[2]; | 155 | static atomic_long_t calc_load_nohz[2]; |
156 | static int calc_load_idx; | 156 | static int calc_load_idx; |
157 | 157 | ||
158 | static inline int calc_load_write_idx(void) | 158 | static inline int calc_load_write_idx(void) |
@@ -167,7 +167,7 @@ static inline int calc_load_write_idx(void) | |||
167 | 167 | ||
168 | /* | 168 | /* |
169 | * If the folding window started, make sure we start writing in the | 169 | * If the folding window started, make sure we start writing in the |
170 | * next idle-delta. | 170 | * next NO_HZ-delta. |
171 | */ | 171 | */ |
172 | if (!time_before(jiffies, READ_ONCE(calc_load_update))) | 172 | if (!time_before(jiffies, READ_ONCE(calc_load_update))) |
173 | idx++; | 173 | idx++; |
@@ -180,24 +180,24 @@ static inline int calc_load_read_idx(void) | |||
180 | return calc_load_idx & 1; | 180 | return calc_load_idx & 1; |
181 | } | 181 | } |
182 | 182 | ||
183 | void calc_load_enter_idle(void) | 183 | void calc_load_nohz_start(void) |
184 | { | 184 | { |
185 | struct rq *this_rq = this_rq(); | 185 | struct rq *this_rq = this_rq(); |
186 | long delta; | 186 | long delta; |
187 | 187 | ||
188 | /* | 188 | /* |
189 | * We're going into NOHZ mode, if there's any pending delta, fold it | 189 | * We're going into NO_HZ mode, if there's any pending delta, fold it |
190 | * into the pending idle delta. | 190 | * into the pending NO_HZ delta. |
191 | */ | 191 | */ |
192 | delta = calc_load_fold_active(this_rq, 0); | 192 | delta = calc_load_fold_active(this_rq, 0); |
193 | if (delta) { | 193 | if (delta) { |
194 | int idx = calc_load_write_idx(); | 194 | int idx = calc_load_write_idx(); |
195 | 195 | ||
196 | atomic_long_add(delta, &calc_load_idle[idx]); | 196 | atomic_long_add(delta, &calc_load_nohz[idx]); |
197 | } | 197 | } |
198 | } | 198 | } |
199 | 199 | ||
200 | void calc_load_exit_idle(void) | 200 | void calc_load_nohz_stop(void) |
201 | { | 201 | { |
202 | struct rq *this_rq = this_rq(); | 202 | struct rq *this_rq = this_rq(); |
203 | 203 | ||
@@ -217,13 +217,13 @@ void calc_load_exit_idle(void) | |||
217 | this_rq->calc_load_update += LOAD_FREQ; | 217 | this_rq->calc_load_update += LOAD_FREQ; |
218 | } | 218 | } |
219 | 219 | ||
220 | static long calc_load_fold_idle(void) | 220 | static long calc_load_nohz_fold(void) |
221 | { | 221 | { |
222 | int idx = calc_load_read_idx(); | 222 | int idx = calc_load_read_idx(); |
223 | long delta = 0; | 223 | long delta = 0; |
224 | 224 | ||
225 | if (atomic_long_read(&calc_load_idle[idx])) | 225 | if (atomic_long_read(&calc_load_nohz[idx])) |
226 | delta = atomic_long_xchg(&calc_load_idle[idx], 0); | 226 | delta = atomic_long_xchg(&calc_load_nohz[idx], 0); |
227 | 227 | ||
228 | return delta; | 228 | return delta; |
229 | } | 229 | } |
@@ -299,9 +299,9 @@ calc_load_n(unsigned long load, unsigned long exp, | |||
299 | 299 | ||
300 | /* | 300 | /* |
301 | * NO_HZ can leave us missing all per-cpu ticks calling | 301 | * NO_HZ can leave us missing all per-cpu ticks calling |
302 | * calc_load_account_active(), but since an idle CPU folds its delta into | 302 | * calc_load_fold_active(), but since a NO_HZ CPU folds its delta into |
303 | * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold | 303 | * calc_load_nohz per calc_load_nohz_start(), all we need to do is fold |
304 | * in the pending idle delta if our idle period crossed a load cycle boundary. | 304 | * in the pending NO_HZ delta if our NO_HZ period crossed a load cycle boundary. |
305 | * | 305 | * |
306 | * Once we've updated the global active value, we need to apply the exponential | 306 | * Once we've updated the global active value, we need to apply the exponential |
307 | * weights adjusted to the number of cycles missed. | 307 | * weights adjusted to the number of cycles missed. |
@@ -330,7 +330,7 @@ static void calc_global_nohz(void) | |||
330 | } | 330 | } |
331 | 331 | ||
332 | /* | 332 | /* |
333 | * Flip the idle index... | 333 | * Flip the NO_HZ index... |
334 | * | 334 | * |
335 | * Make sure we first write the new time then flip the index, so that | 335 | * Make sure we first write the new time then flip the index, so that |
336 | * calc_load_write_idx() will see the new time when it reads the new | 336 | * calc_load_write_idx() will see the new time when it reads the new |
@@ -341,7 +341,7 @@ static void calc_global_nohz(void) | |||
341 | } | 341 | } |
342 | #else /* !CONFIG_NO_HZ_COMMON */ | 342 | #else /* !CONFIG_NO_HZ_COMMON */ |
343 | 343 | ||
344 | static inline long calc_load_fold_idle(void) { return 0; } | 344 | static inline long calc_load_nohz_fold(void) { return 0; } |
345 | static inline void calc_global_nohz(void) { } | 345 | static inline void calc_global_nohz(void) { } |
346 | 346 | ||
347 | #endif /* CONFIG_NO_HZ_COMMON */ | 347 | #endif /* CONFIG_NO_HZ_COMMON */ |
@@ -362,9 +362,9 @@ void calc_global_load(unsigned long ticks) | |||
362 | return; | 362 | return; |
363 | 363 | ||
364 | /* | 364 | /* |
365 | * Fold the 'old' idle-delta to include all NO_HZ cpus. | 365 | * Fold the 'old' NO_HZ-delta to include all NO_HZ cpus. |
366 | */ | 366 | */ |
367 | delta = calc_load_fold_idle(); | 367 | delta = calc_load_nohz_fold(); |
368 | if (delta) | 368 | if (delta) |
369 | atomic_long_add(delta, &calc_load_tasks); | 369 | atomic_long_add(delta, &calc_load_tasks); |
370 | 370 | ||
@@ -378,7 +378,8 @@ void calc_global_load(unsigned long ticks) | |||
378 | WRITE_ONCE(calc_load_update, sample_window + LOAD_FREQ); | 378 | WRITE_ONCE(calc_load_update, sample_window + LOAD_FREQ); |
379 | 379 | ||
380 | /* | 380 | /* |
381 | * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk. | 381 | * In case we went to NO_HZ for multiple LOAD_FREQ intervals |
382 | * catch up in bulk. | ||
382 | */ | 383 | */ |
383 | calc_global_nohz(); | 384 | calc_global_nohz(); |
384 | } | 385 | } |