diff options
Diffstat (limited to 'kernel/hrtimer.c')
-rw-r--r-- | kernel/hrtimer.c | 110 |
1 files changed, 64 insertions, 46 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 9002958a96e7..49da79ab8486 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -191,6 +191,46 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, | |||
191 | } | 191 | } |
192 | } | 192 | } |
193 | 193 | ||
194 | |||
195 | /* | ||
196 | * Get the preferred target CPU for NOHZ | ||
197 | */ | ||
198 | static int hrtimer_get_target(int this_cpu, int pinned) | ||
199 | { | ||
200 | #ifdef CONFIG_NO_HZ | ||
201 | if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) { | ||
202 | int preferred_cpu = get_nohz_load_balancer(); | ||
203 | |||
204 | if (preferred_cpu >= 0) | ||
205 | return preferred_cpu; | ||
206 | } | ||
207 | #endif | ||
208 | return this_cpu; | ||
209 | } | ||
210 | |||
211 | /* | ||
212 | * With HIGHRES=y we do not migrate the timer when it is expiring | ||
213 | * before the next event on the target cpu because we cannot reprogram | ||
214 | * the target cpu hardware and we would cause it to fire late. | ||
215 | * | ||
216 | * Called with cpu_base->lock of target cpu held. | ||
217 | */ | ||
218 | static int | ||
219 | hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) | ||
220 | { | ||
221 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
222 | ktime_t expires; | ||
223 | |||
224 | if (!new_base->cpu_base->hres_active) | ||
225 | return 0; | ||
226 | |||
227 | expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); | ||
228 | return expires.tv64 <= new_base->cpu_base->expires_next.tv64; | ||
229 | #else | ||
230 | return 0; | ||
231 | #endif | ||
232 | } | ||
233 | |||
194 | /* | 234 | /* |
195 | * Switch the timer base to the current CPU when possible. | 235 | * Switch the timer base to the current CPU when possible. |
196 | */ | 236 | */ |
@@ -200,16 +240,8 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, | |||
200 | { | 240 | { |
201 | struct hrtimer_clock_base *new_base; | 241 | struct hrtimer_clock_base *new_base; |
202 | struct hrtimer_cpu_base *new_cpu_base; | 242 | struct hrtimer_cpu_base *new_cpu_base; |
203 | int cpu, preferred_cpu = -1; | 243 | int this_cpu = smp_processor_id(); |
204 | 244 | int cpu = hrtimer_get_target(this_cpu, pinned); | |
205 | cpu = smp_processor_id(); | ||
206 | #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) | ||
207 | if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) { | ||
208 | preferred_cpu = get_nohz_load_balancer(); | ||
209 | if (preferred_cpu >= 0) | ||
210 | cpu = preferred_cpu; | ||
211 | } | ||
212 | #endif | ||
213 | 245 | ||
214 | again: | 246 | again: |
215 | new_cpu_base = &per_cpu(hrtimer_bases, cpu); | 247 | new_cpu_base = &per_cpu(hrtimer_bases, cpu); |
@@ -217,7 +249,7 @@ again: | |||
217 | 249 | ||
218 | if (base != new_base) { | 250 | if (base != new_base) { |
219 | /* | 251 | /* |
220 | * We are trying to schedule the timer on the local CPU. | 252 | * We are trying to move timer to new_base. |
221 | * However we can't change timer's base while it is running, | 253 | * However we can't change timer's base while it is running, |
222 | * so we keep it on the same CPU. No hassle vs. reprogramming | 254 | * so we keep it on the same CPU. No hassle vs. reprogramming |
223 | * the event source in the high resolution case. The softirq | 255 | * the event source in the high resolution case. The softirq |
@@ -233,38 +265,12 @@ again: | |||
233 | spin_unlock(&base->cpu_base->lock); | 265 | spin_unlock(&base->cpu_base->lock); |
234 | spin_lock(&new_base->cpu_base->lock); | 266 | spin_lock(&new_base->cpu_base->lock); |
235 | 267 | ||
236 | /* Optimized away for NOHZ=n SMP=n */ | 268 | if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { |
237 | if (cpu == preferred_cpu) { | 269 | cpu = this_cpu; |
238 | /* Calculate clock monotonic expiry time */ | 270 | spin_unlock(&new_base->cpu_base->lock); |
239 | #ifdef CONFIG_HIGH_RES_TIMERS | 271 | spin_lock(&base->cpu_base->lock); |
240 | ktime_t expires = ktime_sub(hrtimer_get_expires(timer), | 272 | timer->base = base; |
241 | new_base->offset); | 273 | goto again; |
242 | #else | ||
243 | ktime_t expires = hrtimer_get_expires(timer); | ||
244 | #endif | ||
245 | |||
246 | /* | ||
247 | * Get the next event on target cpu from the | ||
248 | * clock events layer. | ||
249 | * This covers the highres=off nohz=on case as well. | ||
250 | */ | ||
251 | ktime_t next = clockevents_get_next_event(cpu); | ||
252 | |||
253 | ktime_t delta = ktime_sub(expires, next); | ||
254 | |||
255 | /* | ||
256 | * We do not migrate the timer when it is expiring | ||
257 | * before the next event on the target cpu because | ||
258 | * we cannot reprogram the target cpu hardware and | ||
259 | * we would cause it to fire late. | ||
260 | */ | ||
261 | if (delta.tv64 < 0) { | ||
262 | cpu = smp_processor_id(); | ||
263 | spin_unlock(&new_base->cpu_base->lock); | ||
264 | spin_lock(&base->cpu_base->lock); | ||
265 | timer->base = base; | ||
266 | goto again; | ||
267 | } | ||
268 | } | 274 | } |
269 | timer->base = new_base; | 275 | timer->base = new_base; |
270 | } | 276 | } |
@@ -1276,14 +1282,22 @@ void hrtimer_interrupt(struct clock_event_device *dev) | |||
1276 | 1282 | ||
1277 | expires_next.tv64 = KTIME_MAX; | 1283 | expires_next.tv64 = KTIME_MAX; |
1278 | 1284 | ||
1285 | spin_lock(&cpu_base->lock); | ||
1286 | /* | ||
1287 | * We set expires_next to KTIME_MAX here with cpu_base->lock | ||
1288 | * held to prevent that a timer is enqueued in our queue via | ||
1289 | * the migration code. This does not affect enqueueing of | ||
1290 | * timers which run their callback and need to be requeued on | ||
1291 | * this CPU. | ||
1292 | */ | ||
1293 | cpu_base->expires_next.tv64 = KTIME_MAX; | ||
1294 | |||
1279 | base = cpu_base->clock_base; | 1295 | base = cpu_base->clock_base; |
1280 | 1296 | ||
1281 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { | 1297 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { |
1282 | ktime_t basenow; | 1298 | ktime_t basenow; |
1283 | struct rb_node *node; | 1299 | struct rb_node *node; |
1284 | 1300 | ||
1285 | spin_lock(&cpu_base->lock); | ||
1286 | |||
1287 | basenow = ktime_add(now, base->offset); | 1301 | basenow = ktime_add(now, base->offset); |
1288 | 1302 | ||
1289 | while ((node = base->first)) { | 1303 | while ((node = base->first)) { |
@@ -1316,11 +1330,15 @@ void hrtimer_interrupt(struct clock_event_device *dev) | |||
1316 | 1330 | ||
1317 | __run_hrtimer(timer); | 1331 | __run_hrtimer(timer); |
1318 | } | 1332 | } |
1319 | spin_unlock(&cpu_base->lock); | ||
1320 | base++; | 1333 | base++; |
1321 | } | 1334 | } |
1322 | 1335 | ||
1336 | /* | ||
1337 | * Store the new expiry value so the migration code can verify | ||
1338 | * against it. | ||
1339 | */ | ||
1323 | cpu_base->expires_next = expires_next; | 1340 | cpu_base->expires_next = expires_next; |
1341 | spin_unlock(&cpu_base->lock); | ||
1324 | 1342 | ||
1325 | /* Reprogramming necessary ? */ | 1343 | /* Reprogramming necessary ? */ |
1326 | if (expires_next.tv64 != KTIME_MAX) { | 1344 | if (expires_next.tv64 != KTIME_MAX) { |