diff options
Diffstat (limited to 'kernel/hrtimer.c')
-rw-r--r-- | kernel/hrtimer.c | 121 |
1 files changed, 64 insertions, 57 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 126b9808f287..49da79ab8486 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -191,6 +191,46 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, | |||
191 | } | 191 | } |
192 | } | 192 | } |
193 | 193 | ||
194 | |||
195 | /* | ||
196 | * Get the preferred target CPU for NOHZ | ||
197 | */ | ||
198 | static int hrtimer_get_target(int this_cpu, int pinned) | ||
199 | { | ||
200 | #ifdef CONFIG_NO_HZ | ||
201 | if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) { | ||
202 | int preferred_cpu = get_nohz_load_balancer(); | ||
203 | |||
204 | if (preferred_cpu >= 0) | ||
205 | return preferred_cpu; | ||
206 | } | ||
207 | #endif | ||
208 | return this_cpu; | ||
209 | } | ||
210 | |||
211 | /* | ||
212 | * With HIGHRES=y we do not migrate the timer when it is expiring | ||
213 | * before the next event on the target cpu because we cannot reprogram | ||
214 | * the target cpu hardware and we would cause it to fire late. | ||
215 | * | ||
216 | * Called with cpu_base->lock of target cpu held. | ||
217 | */ | ||
218 | static int | ||
219 | hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) | ||
220 | { | ||
221 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
222 | ktime_t expires; | ||
223 | |||
224 | if (!new_base->cpu_base->hres_active) | ||
225 | return 0; | ||
226 | |||
227 | expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); | ||
228 | return expires.tv64 <= new_base->cpu_base->expires_next.tv64; | ||
229 | #else | ||
230 | return 0; | ||
231 | #endif | ||
232 | } | ||
233 | |||
194 | /* | 234 | /* |
195 | * Switch the timer base to the current CPU when possible. | 235 | * Switch the timer base to the current CPU when possible. |
196 | */ | 236 | */ |
@@ -200,27 +240,8 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, | |||
200 | { | 240 | { |
201 | struct hrtimer_clock_base *new_base; | 241 | struct hrtimer_clock_base *new_base; |
202 | struct hrtimer_cpu_base *new_cpu_base; | 242 | struct hrtimer_cpu_base *new_cpu_base; |
203 | int cpu, preferred_cpu = -1; | 243 | int this_cpu = smp_processor_id(); |
204 | 244 | int cpu = hrtimer_get_target(this_cpu, pinned); | |
205 | cpu = smp_processor_id(); | ||
206 | #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) | ||
207 | if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) { | ||
208 | preferred_cpu = get_nohz_load_balancer(); | ||
209 | if (preferred_cpu >= 0) { | ||
210 | /* | ||
211 | * We must not check the expiry value when | ||
212 | * preferred_cpu is the current cpu. If base | ||
213 | * != new_base we would loop forever when the | ||
214 | * timer expires before the current programmed | ||
215 | * next timer event. | ||
216 | */ | ||
217 | if (preferred_cpu != cpu) | ||
218 | cpu = preferred_cpu; | ||
219 | else | ||
220 | preferred_cpu = -1; | ||
221 | } | ||
222 | } | ||
223 | #endif | ||
224 | 245 | ||
225 | again: | 246 | again: |
226 | new_cpu_base = &per_cpu(hrtimer_bases, cpu); | 247 | new_cpu_base = &per_cpu(hrtimer_bases, cpu); |
@@ -228,7 +249,7 @@ again: | |||
228 | 249 | ||
229 | if (base != new_base) { | 250 | if (base != new_base) { |
230 | /* | 251 | /* |
231 | * We are trying to schedule the timer on the local CPU. | 252 | * We are trying to move timer to new_base. |
232 | * However we can't change timer's base while it is running, | 253 | * However we can't change timer's base while it is running, |
233 | * so we keep it on the same CPU. No hassle vs. reprogramming | 254 | * so we keep it on the same CPU. No hassle vs. reprogramming |
234 | * the event source in the high resolution case. The softirq | 255 | * the event source in the high resolution case. The softirq |
@@ -244,38 +265,12 @@ again: | |||
244 | spin_unlock(&base->cpu_base->lock); | 265 | spin_unlock(&base->cpu_base->lock); |
245 | spin_lock(&new_base->cpu_base->lock); | 266 | spin_lock(&new_base->cpu_base->lock); |
246 | 267 | ||
247 | /* Optimized away for NOHZ=n SMP=n */ | 268 | if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { |
248 | if (cpu == preferred_cpu) { | 269 | cpu = this_cpu; |
249 | /* Calculate clock monotonic expiry time */ | 270 | spin_unlock(&new_base->cpu_base->lock); |
250 | #ifdef CONFIG_HIGH_RES_TIMERS | 271 | spin_lock(&base->cpu_base->lock); |
251 | ktime_t expires = ktime_sub(hrtimer_get_expires(timer), | 272 | timer->base = base; |
252 | new_base->offset); | 273 | goto again; |
253 | #else | ||
254 | ktime_t expires = hrtimer_get_expires(timer); | ||
255 | #endif | ||
256 | |||
257 | /* | ||
258 | * Get the next event on target cpu from the | ||
259 | * clock events layer. | ||
260 | * This covers the highres=off nohz=on case as well. | ||
261 | */ | ||
262 | ktime_t next = clockevents_get_next_event(cpu); | ||
263 | |||
264 | ktime_t delta = ktime_sub(expires, next); | ||
265 | |||
266 | /* | ||
267 | * We do not migrate the timer when it is expiring | ||
268 | * before the next event on the target cpu because | ||
269 | * we cannot reprogram the target cpu hardware and | ||
270 | * we would cause it to fire late. | ||
271 | */ | ||
272 | if (delta.tv64 < 0) { | ||
273 | cpu = smp_processor_id(); | ||
274 | spin_unlock(&new_base->cpu_base->lock); | ||
275 | spin_lock(&base->cpu_base->lock); | ||
276 | timer->base = base; | ||
277 | goto again; | ||
278 | } | ||
279 | } | 274 | } |
280 | timer->base = new_base; | 275 | timer->base = new_base; |
281 | } | 276 | } |
@@ -1287,14 +1282,22 @@ void hrtimer_interrupt(struct clock_event_device *dev) | |||
1287 | 1282 | ||
1288 | expires_next.tv64 = KTIME_MAX; | 1283 | expires_next.tv64 = KTIME_MAX; |
1289 | 1284 | ||
1285 | spin_lock(&cpu_base->lock); | ||
1286 | /* | ||
1287 | * We set expires_next to KTIME_MAX here with cpu_base->lock | ||
1288 | * held to prevent that a timer is enqueued in our queue via | ||
1289 | * the migration code. This does not affect enqueueing of | ||
1290 | * timers which run their callback and need to be requeued on | ||
1291 | * this CPU. | ||
1292 | */ | ||
1293 | cpu_base->expires_next.tv64 = KTIME_MAX; | ||
1294 | |||
1290 | base = cpu_base->clock_base; | 1295 | base = cpu_base->clock_base; |
1291 | 1296 | ||
1292 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { | 1297 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { |
1293 | ktime_t basenow; | 1298 | ktime_t basenow; |
1294 | struct rb_node *node; | 1299 | struct rb_node *node; |
1295 | 1300 | ||
1296 | spin_lock(&cpu_base->lock); | ||
1297 | |||
1298 | basenow = ktime_add(now, base->offset); | 1301 | basenow = ktime_add(now, base->offset); |
1299 | 1302 | ||
1300 | while ((node = base->first)) { | 1303 | while ((node = base->first)) { |
@@ -1327,11 +1330,15 @@ void hrtimer_interrupt(struct clock_event_device *dev) | |||
1327 | 1330 | ||
1328 | __run_hrtimer(timer); | 1331 | __run_hrtimer(timer); |
1329 | } | 1332 | } |
1330 | spin_unlock(&cpu_base->lock); | ||
1331 | base++; | 1333 | base++; |
1332 | } | 1334 | } |
1333 | 1335 | ||
1336 | /* | ||
1337 | * Store the new expiry value so the migration code can verify | ||
1338 | * against it. | ||
1339 | */ | ||
1334 | cpu_base->expires_next = expires_next; | 1340 | cpu_base->expires_next = expires_next; |
1341 | spin_unlock(&cpu_base->lock); | ||
1335 | 1342 | ||
1336 | /* Reprogramming necessary ? */ | 1343 | /* Reprogramming necessary ? */ |
1337 | if (expires_next.tv64 != KTIME_MAX) { | 1344 | if (expires_next.tv64 != KTIME_MAX) { |