aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/hrtimer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/hrtimer.c')
-rw-r--r--kernel/hrtimer.c121
1 files changed, 64 insertions, 57 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 126b9808f287..49da79ab8486 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -191,6 +191,46 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
191 } 191 }
192} 192}
193 193
194
195/*
196 * Get the preferred target CPU for NOHZ
197 */
198static int hrtimer_get_target(int this_cpu, int pinned)
199{
200#ifdef CONFIG_NO_HZ
201 if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) {
202 int preferred_cpu = get_nohz_load_balancer();
203
204 if (preferred_cpu >= 0)
205 return preferred_cpu;
206 }
207#endif
208 return this_cpu;
209}
210
211/*
212 * With HIGHRES=y we do not migrate the timer when it is expiring
213 * before the next event on the target cpu because we cannot reprogram
214 * the target cpu hardware and we would cause it to fire late.
215 *
216 * Called with cpu_base->lock of target cpu held.
217 */
218static int
219hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
220{
221#ifdef CONFIG_HIGH_RES_TIMERS
222 ktime_t expires;
223
224 if (!new_base->cpu_base->hres_active)
225 return 0;
226
227 expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
228 return expires.tv64 <= new_base->cpu_base->expires_next.tv64;
229#else
230 return 0;
231#endif
232}
233
194/* 234/*
195 * Switch the timer base to the current CPU when possible. 235 * Switch the timer base to the current CPU when possible.
196 */ 236 */
@@ -200,27 +240,8 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
200{ 240{
201 struct hrtimer_clock_base *new_base; 241 struct hrtimer_clock_base *new_base;
202 struct hrtimer_cpu_base *new_cpu_base; 242 struct hrtimer_cpu_base *new_cpu_base;
203 int cpu, preferred_cpu = -1; 243 int this_cpu = smp_processor_id();
204 244 int cpu = hrtimer_get_target(this_cpu, pinned);
205 cpu = smp_processor_id();
206#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
207 if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
208 preferred_cpu = get_nohz_load_balancer();
209 if (preferred_cpu >= 0) {
210 /*
211 * We must not check the expiry value when
212 * preferred_cpu is the current cpu. If base
213 * != new_base we would loop forever when the
214 * timer expires before the current programmed
215 * next timer event.
216 */
217 if (preferred_cpu != cpu)
218 cpu = preferred_cpu;
219 else
220 preferred_cpu = -1;
221 }
222 }
223#endif
224 245
225again: 246again:
226 new_cpu_base = &per_cpu(hrtimer_bases, cpu); 247 new_cpu_base = &per_cpu(hrtimer_bases, cpu);
@@ -228,7 +249,7 @@ again:
228 249
229 if (base != new_base) { 250 if (base != new_base) {
230 /* 251 /*
231 * We are trying to schedule the timer on the local CPU. 252 * We are trying to move timer to new_base.
232 * However we can't change timer's base while it is running, 253 * However we can't change timer's base while it is running,
233 * so we keep it on the same CPU. No hassle vs. reprogramming 254 * so we keep it on the same CPU. No hassle vs. reprogramming
234 * the event source in the high resolution case. The softirq 255 * the event source in the high resolution case. The softirq
@@ -244,38 +265,12 @@ again:
244 spin_unlock(&base->cpu_base->lock); 265 spin_unlock(&base->cpu_base->lock);
245 spin_lock(&new_base->cpu_base->lock); 266 spin_lock(&new_base->cpu_base->lock);
246 267
247 /* Optimized away for NOHZ=n SMP=n */ 268 if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
248 if (cpu == preferred_cpu) { 269 cpu = this_cpu;
249 /* Calculate clock monotonic expiry time */ 270 spin_unlock(&new_base->cpu_base->lock);
250#ifdef CONFIG_HIGH_RES_TIMERS 271 spin_lock(&base->cpu_base->lock);
251 ktime_t expires = ktime_sub(hrtimer_get_expires(timer), 272 timer->base = base;
252 new_base->offset); 273 goto again;
253#else
254 ktime_t expires = hrtimer_get_expires(timer);
255#endif
256
257 /*
258 * Get the next event on target cpu from the
259 * clock events layer.
260 * This covers the highres=off nohz=on case as well.
261 */
262 ktime_t next = clockevents_get_next_event(cpu);
263
264 ktime_t delta = ktime_sub(expires, next);
265
266 /*
267 * We do not migrate the timer when it is expiring
268 * before the next event on the target cpu because
269 * we cannot reprogram the target cpu hardware and
270 * we would cause it to fire late.
271 */
272 if (delta.tv64 < 0) {
273 cpu = smp_processor_id();
274 spin_unlock(&new_base->cpu_base->lock);
275 spin_lock(&base->cpu_base->lock);
276 timer->base = base;
277 goto again;
278 }
279 } 274 }
280 timer->base = new_base; 275 timer->base = new_base;
281 } 276 }
@@ -1287,14 +1282,22 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1287 1282
1288 expires_next.tv64 = KTIME_MAX; 1283 expires_next.tv64 = KTIME_MAX;
1289 1284
1285 spin_lock(&cpu_base->lock);
1286 /*
1287 * We set expires_next to KTIME_MAX here with cpu_base->lock
1288 * held to prevent that a timer is enqueued in our queue via
1289 * the migration code. This does not affect enqueueing of
1290 * timers which run their callback and need to be requeued on
1291 * this CPU.
1292 */
1293 cpu_base->expires_next.tv64 = KTIME_MAX;
1294
1290 base = cpu_base->clock_base; 1295 base = cpu_base->clock_base;
1291 1296
1292 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 1297 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1293 ktime_t basenow; 1298 ktime_t basenow;
1294 struct rb_node *node; 1299 struct rb_node *node;
1295 1300
1296 spin_lock(&cpu_base->lock);
1297
1298 basenow = ktime_add(now, base->offset); 1301 basenow = ktime_add(now, base->offset);
1299 1302
1300 while ((node = base->first)) { 1303 while ((node = base->first)) {
@@ -1327,11 +1330,15 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1327 1330
1328 __run_hrtimer(timer); 1331 __run_hrtimer(timer);
1329 } 1332 }
1330 spin_unlock(&cpu_base->lock);
1331 base++; 1333 base++;
1332 } 1334 }
1333 1335
1336 /*
1337 * Store the new expiry value so the migration code can verify
1338 * against it.
1339 */
1334 cpu_base->expires_next = expires_next; 1340 cpu_base->expires_next = expires_next;
1341 spin_unlock(&cpu_base->lock);
1335 1342
1336 /* Reprogramming necessary ? */ 1343 /* Reprogramming necessary ? */
1337 if (expires_next.tv64 != KTIME_MAX) { 1344 if (expires_next.tv64 != KTIME_MAX) {