summaryrefslogtreecommitdiffstats
path: root/kernel/sched/loadavg.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/loadavg.c')
-rw-r--r--kernel/sched/loadavg.c51
1 files changed, 26 insertions, 25 deletions
diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index f15fb2bdbc0d..f14716a3522f 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -117,7 +117,7 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
117 * load-average relies on per-cpu sampling from the tick, it is affected by 117 * load-average relies on per-cpu sampling from the tick, it is affected by
118 * NO_HZ. 118 * NO_HZ.
119 * 119 *
120 * The basic idea is to fold the nr_active delta into a global idle-delta upon 120 * The basic idea is to fold the nr_active delta into a global NO_HZ-delta upon
121 * entering NO_HZ state such that we can include this as an 'extra' cpu delta 121 * entering NO_HZ state such that we can include this as an 'extra' cpu delta
122 * when we read the global state. 122 * when we read the global state.
123 * 123 *
@@ -126,7 +126,7 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
126 * - When we go NO_HZ idle during the window, we can negate our sample 126 * - When we go NO_HZ idle during the window, we can negate our sample
127 * contribution, causing under-accounting. 127 * contribution, causing under-accounting.
128 * 128 *
129 * We avoid this by keeping two idle-delta counters and flipping them 129 * We avoid this by keeping two NO_HZ-delta counters and flipping them
130 * when the window starts, thus separating old and new NO_HZ load. 130 * when the window starts, thus separating old and new NO_HZ load.
131 * 131 *
132 * The only trick is the slight shift in index flip for read vs write. 132 * The only trick is the slight shift in index flip for read vs write.
@@ -137,22 +137,22 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
137 * r:0 0 1 1 0 0 1 1 0 137 * r:0 0 1 1 0 0 1 1 0
138 * w:0 1 1 0 0 1 1 0 0 138 * w:0 1 1 0 0 1 1 0 0
139 * 139 *
140 * This ensures we'll fold the old idle contribution in this window while 140 * This ensures we'll fold the old NO_HZ contribution in this window while
141 * accumlating the new one. 141 * accumlating the new one.
142 * 142 *
143 * - When we wake up from NO_HZ idle during the window, we push up our 143 * - When we wake up from NO_HZ during the window, we push up our
144 * contribution, since we effectively move our sample point to a known 144 * contribution, since we effectively move our sample point to a known
145 * busy state. 145 * busy state.
146 * 146 *
147 * This is solved by pushing the window forward, and thus skipping the 147 * This is solved by pushing the window forward, and thus skipping the
148 * sample, for this cpu (effectively using the idle-delta for this cpu which 148 * sample, for this cpu (effectively using the NO_HZ-delta for this cpu which
149 * was in effect at the time the window opened). This also solves the issue 149 * was in effect at the time the window opened). This also solves the issue
150 * of having to deal with a cpu having been in NOHZ idle for multiple 150 * of having to deal with a cpu having been in NO_HZ for multiple LOAD_FREQ
151 * LOAD_FREQ intervals. 151 * intervals.
152 * 152 *
153 * When making the ILB scale, we should try to pull this in as well. 153 * When making the ILB scale, we should try to pull this in as well.
154 */ 154 */
155static atomic_long_t calc_load_idle[2]; 155static atomic_long_t calc_load_nohz[2];
156static int calc_load_idx; 156static int calc_load_idx;
157 157
158static inline int calc_load_write_idx(void) 158static inline int calc_load_write_idx(void)
@@ -167,7 +167,7 @@ static inline int calc_load_write_idx(void)
167 167
168 /* 168 /*
169 * If the folding window started, make sure we start writing in the 169 * If the folding window started, make sure we start writing in the
170 * next idle-delta. 170 * next NO_HZ-delta.
171 */ 171 */
172 if (!time_before(jiffies, READ_ONCE(calc_load_update))) 172 if (!time_before(jiffies, READ_ONCE(calc_load_update)))
173 idx++; 173 idx++;
@@ -180,24 +180,24 @@ static inline int calc_load_read_idx(void)
180 return calc_load_idx & 1; 180 return calc_load_idx & 1;
181} 181}
182 182
183void calc_load_enter_idle(void) 183void calc_load_nohz_start(void)
184{ 184{
185 struct rq *this_rq = this_rq(); 185 struct rq *this_rq = this_rq();
186 long delta; 186 long delta;
187 187
188 /* 188 /*
189 * We're going into NOHZ mode, if there's any pending delta, fold it 189 * We're going into NO_HZ mode, if there's any pending delta, fold it
190 * into the pending idle delta. 190 * into the pending NO_HZ delta.
191 */ 191 */
192 delta = calc_load_fold_active(this_rq, 0); 192 delta = calc_load_fold_active(this_rq, 0);
193 if (delta) { 193 if (delta) {
194 int idx = calc_load_write_idx(); 194 int idx = calc_load_write_idx();
195 195
196 atomic_long_add(delta, &calc_load_idle[idx]); 196 atomic_long_add(delta, &calc_load_nohz[idx]);
197 } 197 }
198} 198}
199 199
200void calc_load_exit_idle(void) 200void calc_load_nohz_stop(void)
201{ 201{
202 struct rq *this_rq = this_rq(); 202 struct rq *this_rq = this_rq();
203 203
@@ -217,13 +217,13 @@ void calc_load_exit_idle(void)
217 this_rq->calc_load_update += LOAD_FREQ; 217 this_rq->calc_load_update += LOAD_FREQ;
218} 218}
219 219
220static long calc_load_fold_idle(void) 220static long calc_load_nohz_fold(void)
221{ 221{
222 int idx = calc_load_read_idx(); 222 int idx = calc_load_read_idx();
223 long delta = 0; 223 long delta = 0;
224 224
225 if (atomic_long_read(&calc_load_idle[idx])) 225 if (atomic_long_read(&calc_load_nohz[idx]))
226 delta = atomic_long_xchg(&calc_load_idle[idx], 0); 226 delta = atomic_long_xchg(&calc_load_nohz[idx], 0);
227 227
228 return delta; 228 return delta;
229} 229}
@@ -299,9 +299,9 @@ calc_load_n(unsigned long load, unsigned long exp,
299 299
300/* 300/*
301 * NO_HZ can leave us missing all per-cpu ticks calling 301 * NO_HZ can leave us missing all per-cpu ticks calling
302 * calc_load_account_active(), but since an idle CPU folds its delta into 302 * calc_load_fold_active(), but since a NO_HZ CPU folds its delta into
303 * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold 303 * calc_load_nohz per calc_load_nohz_start(), all we need to do is fold
304 * in the pending idle delta if our idle period crossed a load cycle boundary. 304 * in the pending NO_HZ delta if our NO_HZ period crossed a load cycle boundary.
305 * 305 *
306 * Once we've updated the global active value, we need to apply the exponential 306 * Once we've updated the global active value, we need to apply the exponential
307 * weights adjusted to the number of cycles missed. 307 * weights adjusted to the number of cycles missed.
@@ -330,7 +330,7 @@ static void calc_global_nohz(void)
330 } 330 }
331 331
332 /* 332 /*
333 * Flip the idle index... 333 * Flip the NO_HZ index...
334 * 334 *
335 * Make sure we first write the new time then flip the index, so that 335 * Make sure we first write the new time then flip the index, so that
336 * calc_load_write_idx() will see the new time when it reads the new 336 * calc_load_write_idx() will see the new time when it reads the new
@@ -341,7 +341,7 @@ static void calc_global_nohz(void)
341} 341}
342#else /* !CONFIG_NO_HZ_COMMON */ 342#else /* !CONFIG_NO_HZ_COMMON */
343 343
344static inline long calc_load_fold_idle(void) { return 0; } 344static inline long calc_load_nohz_fold(void) { return 0; }
345static inline void calc_global_nohz(void) { } 345static inline void calc_global_nohz(void) { }
346 346
347#endif /* CONFIG_NO_HZ_COMMON */ 347#endif /* CONFIG_NO_HZ_COMMON */
@@ -362,9 +362,9 @@ void calc_global_load(unsigned long ticks)
362 return; 362 return;
363 363
364 /* 364 /*
365 * Fold the 'old' idle-delta to include all NO_HZ cpus. 365 * Fold the 'old' NO_HZ-delta to include all NO_HZ cpus.
366 */ 366 */
367 delta = calc_load_fold_idle(); 367 delta = calc_load_nohz_fold();
368 if (delta) 368 if (delta)
369 atomic_long_add(delta, &calc_load_tasks); 369 atomic_long_add(delta, &calc_load_tasks);
370 370
@@ -378,7 +378,8 @@ void calc_global_load(unsigned long ticks)
378 WRITE_ONCE(calc_load_update, sample_window + LOAD_FREQ); 378 WRITE_ONCE(calc_load_update, sample_window + LOAD_FREQ);
379 379
380 /* 380 /*
381 * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk. 381 * In case we went to NO_HZ for multiple LOAD_FREQ intervals
382 * catch up in bulk.
382 */ 383 */
383 calc_global_nohz(); 384 calc_global_nohz();
384} 385}