aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/hrtimer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/hrtimer.c')
-rw-r--r--kernel/hrtimer.c86
1 files changed, 77 insertions, 9 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index cb8a15c19583..49da79ab8486 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -43,6 +43,8 @@
43#include <linux/seq_file.h> 43#include <linux/seq_file.h>
44#include <linux/err.h> 44#include <linux/err.h>
45#include <linux/debugobjects.h> 45#include <linux/debugobjects.h>
46#include <linux/sched.h>
47#include <linux/timer.h>
46 48
47#include <asm/uaccess.h> 49#include <asm/uaccess.h>
48 50
@@ -189,21 +191,65 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
189 } 191 }
190} 192}
191 193
194
195/*
196 * Get the preferred target CPU for NOHZ
197 */
198static int hrtimer_get_target(int this_cpu, int pinned)
199{
200#ifdef CONFIG_NO_HZ
201 if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) {
202 int preferred_cpu = get_nohz_load_balancer();
203
204 if (preferred_cpu >= 0)
205 return preferred_cpu;
206 }
207#endif
208 return this_cpu;
209}
210
211/*
212 * With HIGHRES=y we do not migrate the timer when it is expiring
213 * before the next event on the target cpu because we cannot reprogram
214 * the target cpu hardware and we would cause it to fire late.
215 *
216 * Called with cpu_base->lock of target cpu held.
217 */
218static int
219hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
220{
221#ifdef CONFIG_HIGH_RES_TIMERS
222 ktime_t expires;
223
224 if (!new_base->cpu_base->hres_active)
225 return 0;
226
227 expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
228 return expires.tv64 <= new_base->cpu_base->expires_next.tv64;
229#else
230 return 0;
231#endif
232}
233
192/* 234/*
193 * Switch the timer base to the current CPU when possible. 235 * Switch the timer base to the current CPU when possible.
194 */ 236 */
195static inline struct hrtimer_clock_base * 237static inline struct hrtimer_clock_base *
196switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base) 238switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
239 int pinned)
197{ 240{
198 struct hrtimer_clock_base *new_base; 241 struct hrtimer_clock_base *new_base;
199 struct hrtimer_cpu_base *new_cpu_base; 242 struct hrtimer_cpu_base *new_cpu_base;
243 int this_cpu = smp_processor_id();
244 int cpu = hrtimer_get_target(this_cpu, pinned);
200 245
201 new_cpu_base = &__get_cpu_var(hrtimer_bases); 246again:
247 new_cpu_base = &per_cpu(hrtimer_bases, cpu);
202 new_base = &new_cpu_base->clock_base[base->index]; 248 new_base = &new_cpu_base->clock_base[base->index];
203 249
204 if (base != new_base) { 250 if (base != new_base) {
205 /* 251 /*
206 * We are trying to schedule the timer on the local CPU. 252 * We are trying to move timer to new_base.
207 * However we can't change timer's base while it is running, 253 * However we can't change timer's base while it is running,
208 * so we keep it on the same CPU. No hassle vs. reprogramming 254 * so we keep it on the same CPU. No hassle vs. reprogramming
209 * the event source in the high resolution case. The softirq 255 * the event source in the high resolution case. The softirq
@@ -218,6 +264,14 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base)
218 timer->base = NULL; 264 timer->base = NULL;
219 spin_unlock(&base->cpu_base->lock); 265 spin_unlock(&base->cpu_base->lock);
220 spin_lock(&new_base->cpu_base->lock); 266 spin_lock(&new_base->cpu_base->lock);
267
268 if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
269 cpu = this_cpu;
270 spin_unlock(&new_base->cpu_base->lock);
271 spin_lock(&base->cpu_base->lock);
272 timer->base = base;
273 goto again;
274 }
221 timer->base = new_base; 275 timer->base = new_base;
222 } 276 }
223 return new_base; 277 return new_base;
@@ -235,7 +289,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
235 return base; 289 return base;
236} 290}
237 291
238# define switch_hrtimer_base(t, b) (b) 292# define switch_hrtimer_base(t, b, p) (b)
239 293
240#endif /* !CONFIG_SMP */ 294#endif /* !CONFIG_SMP */
241 295
@@ -332,6 +386,8 @@ ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
332 return res; 386 return res;
333} 387}
334 388
389EXPORT_SYMBOL_GPL(ktime_add_safe);
390
335#ifdef CONFIG_DEBUG_OBJECTS_TIMERS 391#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
336 392
337static struct debug_obj_descr hrtimer_debug_descr; 393static struct debug_obj_descr hrtimer_debug_descr;
@@ -907,9 +963,9 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
907 ret = remove_hrtimer(timer, base); 963 ret = remove_hrtimer(timer, base);
908 964
909 /* Switch the timer base, if necessary: */ 965 /* Switch the timer base, if necessary: */
910 new_base = switch_hrtimer_base(timer, base); 966 new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
911 967
912 if (mode == HRTIMER_MODE_REL) { 968 if (mode & HRTIMER_MODE_REL) {
913 tim = ktime_add_safe(tim, new_base->get_time()); 969 tim = ktime_add_safe(tim, new_base->get_time());
914 /* 970 /*
915 * CONFIG_TIME_LOW_RES is a temporary way for architectures 971 * CONFIG_TIME_LOW_RES is a temporary way for architectures
@@ -1226,14 +1282,22 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1226 1282
1227 expires_next.tv64 = KTIME_MAX; 1283 expires_next.tv64 = KTIME_MAX;
1228 1284
1285 spin_lock(&cpu_base->lock);
1286 /*
1287 * We set expires_next to KTIME_MAX here with cpu_base->lock
1288 * held to prevent that a timer is enqueued in our queue via
1289 * the migration code. This does not affect enqueueing of
1290 * timers which run their callback and need to be requeued on
1291 * this CPU.
1292 */
1293 cpu_base->expires_next.tv64 = KTIME_MAX;
1294
1229 base = cpu_base->clock_base; 1295 base = cpu_base->clock_base;
1230 1296
1231 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 1297 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1232 ktime_t basenow; 1298 ktime_t basenow;
1233 struct rb_node *node; 1299 struct rb_node *node;
1234 1300
1235 spin_lock(&cpu_base->lock);
1236
1237 basenow = ktime_add(now, base->offset); 1301 basenow = ktime_add(now, base->offset);
1238 1302
1239 while ((node = base->first)) { 1303 while ((node = base->first)) {
@@ -1266,11 +1330,15 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1266 1330
1267 __run_hrtimer(timer); 1331 __run_hrtimer(timer);
1268 } 1332 }
1269 spin_unlock(&cpu_base->lock);
1270 base++; 1333 base++;
1271 } 1334 }
1272 1335
1336 /*
1337 * Store the new expiry value so the migration code can verify
1338 * against it.
1339 */
1273 cpu_base->expires_next = expires_next; 1340 cpu_base->expires_next = expires_next;
1341 spin_unlock(&cpu_base->lock);
1274 1342
1275 /* Reprogramming necessary ? */ 1343 /* Reprogramming necessary ? */
1276 if (expires_next.tv64 != KTIME_MAX) { 1344 if (expires_next.tv64 != KTIME_MAX) {