aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/timer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/timer.c')
-rw-r--r--kernel/timer.c351
1 files changed, 177 insertions, 174 deletions
diff --git a/kernel/timer.c b/kernel/timer.c
index 207aa4f0aa10..51ff917c9590 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -57,6 +57,11 @@ static void time_interpolator_update(long delta_nsec);
57#define TVN_MASK (TVN_SIZE - 1) 57#define TVN_MASK (TVN_SIZE - 1)
58#define TVR_MASK (TVR_SIZE - 1) 58#define TVR_MASK (TVR_SIZE - 1)
59 59
60struct timer_base_s {
61 spinlock_t lock;
62 struct timer_list *running_timer;
63};
64
60typedef struct tvec_s { 65typedef struct tvec_s {
61 struct list_head vec[TVN_SIZE]; 66 struct list_head vec[TVN_SIZE];
62} tvec_t; 67} tvec_t;
@@ -66,9 +71,8 @@ typedef struct tvec_root_s {
66} tvec_root_t; 71} tvec_root_t;
67 72
68struct tvec_t_base_s { 73struct tvec_t_base_s {
69 spinlock_t lock; 74 struct timer_base_s t_base;
70 unsigned long timer_jiffies; 75 unsigned long timer_jiffies;
71 struct timer_list *running_timer;
72 tvec_root_t tv1; 76 tvec_root_t tv1;
73 tvec_t tv2; 77 tvec_t tv2;
74 tvec_t tv3; 78 tvec_t tv3;
@@ -77,18 +81,16 @@ struct tvec_t_base_s {
77} ____cacheline_aligned_in_smp; 81} ____cacheline_aligned_in_smp;
78 82
79typedef struct tvec_t_base_s tvec_base_t; 83typedef struct tvec_t_base_s tvec_base_t;
84static DEFINE_PER_CPU(tvec_base_t, tvec_bases);
80 85
81static inline void set_running_timer(tvec_base_t *base, 86static inline void set_running_timer(tvec_base_t *base,
82 struct timer_list *timer) 87 struct timer_list *timer)
83{ 88{
84#ifdef CONFIG_SMP 89#ifdef CONFIG_SMP
85 base->running_timer = timer; 90 base->t_base.running_timer = timer;
86#endif 91#endif
87} 92}
88 93
89/* Fake initialization */
90static DEFINE_PER_CPU(tvec_base_t, tvec_bases) = { SPIN_LOCK_UNLOCKED };
91
92static void check_timer_failed(struct timer_list *timer) 94static void check_timer_failed(struct timer_list *timer)
93{ 95{
94 static int whine_count; 96 static int whine_count;
@@ -103,7 +105,6 @@ static void check_timer_failed(struct timer_list *timer)
103 /* 105 /*
104 * Now fix it up 106 * Now fix it up
105 */ 107 */
106 spin_lock_init(&timer->lock);
107 timer->magic = TIMER_MAGIC; 108 timer->magic = TIMER_MAGIC;
108} 109}
109 110
@@ -156,65 +157,113 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
156 list_add_tail(&timer->entry, vec); 157 list_add_tail(&timer->entry, vec);
157} 158}
158 159
160typedef struct timer_base_s timer_base_t;
161/*
162 * Used by TIMER_INITIALIZER, we can't use per_cpu(tvec_bases)
163 * at compile time, and we need timer->base to lock the timer.
164 */
165timer_base_t __init_timer_base
166 ____cacheline_aligned_in_smp = { .lock = SPIN_LOCK_UNLOCKED };
167EXPORT_SYMBOL(__init_timer_base);
168
169/***
170 * init_timer - initialize a timer.
171 * @timer: the timer to be initialized
172 *
173 * init_timer() must be done to a timer prior calling *any* of the
174 * other timer functions.
175 */
176void fastcall init_timer(struct timer_list *timer)
177{
178 timer->entry.next = NULL;
179 timer->base = &per_cpu(tvec_bases, raw_smp_processor_id()).t_base;
180 timer->magic = TIMER_MAGIC;
181}
182EXPORT_SYMBOL(init_timer);
183
184static inline void detach_timer(struct timer_list *timer,
185 int clear_pending)
186{
187 struct list_head *entry = &timer->entry;
188
189 __list_del(entry->prev, entry->next);
190 if (clear_pending)
191 entry->next = NULL;
192 entry->prev = LIST_POISON2;
193}
194
195/*
196 * We are using hashed locking: holding per_cpu(tvec_bases).t_base.lock
197 * means that all timers which are tied to this base via timer->base are
198 * locked, and the base itself is locked too.
199 *
200 * So __run_timers/migrate_timers can safely modify all timers which could
201 * be found on ->tvX lists.
202 *
203 * When the timer's base is locked, and the timer removed from list, it is
204 * possible to set timer->base = NULL and drop the lock: the timer remains
205 * locked.
206 */
207static timer_base_t *lock_timer_base(struct timer_list *timer,
208 unsigned long *flags)
209{
210 timer_base_t *base;
211
212 for (;;) {
213 base = timer->base;
214 if (likely(base != NULL)) {
215 spin_lock_irqsave(&base->lock, *flags);
216 if (likely(base == timer->base))
217 return base;
218 /* The timer has migrated to another CPU */
219 spin_unlock_irqrestore(&base->lock, *flags);
220 }
221 cpu_relax();
222 }
223}
224
159int __mod_timer(struct timer_list *timer, unsigned long expires) 225int __mod_timer(struct timer_list *timer, unsigned long expires)
160{ 226{
161 tvec_base_t *old_base, *new_base; 227 timer_base_t *base;
228 tvec_base_t *new_base;
162 unsigned long flags; 229 unsigned long flags;
163 int ret = 0; 230 int ret = 0;
164 231
165 BUG_ON(!timer->function); 232 BUG_ON(!timer->function);
166
167 check_timer(timer); 233 check_timer(timer);
168 234
169 spin_lock_irqsave(&timer->lock, flags); 235 base = lock_timer_base(timer, &flags);
236
237 if (timer_pending(timer)) {
238 detach_timer(timer, 0);
239 ret = 1;
240 }
241
170 new_base = &__get_cpu_var(tvec_bases); 242 new_base = &__get_cpu_var(tvec_bases);
171repeat:
172 old_base = timer->base;
173 243
174 /* 244 if (base != &new_base->t_base) {
175 * Prevent deadlocks via ordering by old_base < new_base.
176 */
177 if (old_base && (new_base != old_base)) {
178 if (old_base < new_base) {
179 spin_lock(&new_base->lock);
180 spin_lock(&old_base->lock);
181 } else {
182 spin_lock(&old_base->lock);
183 spin_lock(&new_base->lock);
184 }
185 /* 245 /*
186 * The timer base might have been cancelled while we were 246 * We are trying to schedule the timer on the local CPU.
187 * trying to take the lock(s): 247 * However we can't change timer's base while it is running,
248 * otherwise del_timer_sync() can't detect that the timer's
249 * handler yet has not finished. This also guarantees that
250 * the timer is serialized wrt itself.
188 */ 251 */
189 if (timer->base != old_base) { 252 if (unlikely(base->running_timer == timer)) {
190 spin_unlock(&new_base->lock); 253 /* The timer remains on a former base */
191 spin_unlock(&old_base->lock); 254 new_base = container_of(base, tvec_base_t, t_base);
192 goto repeat; 255 } else {
193 } 256 /* See the comment in lock_timer_base() */
194 } else { 257 timer->base = NULL;
195 spin_lock(&new_base->lock); 258 spin_unlock(&base->lock);
196 if (timer->base != old_base) { 259 spin_lock(&new_base->t_base.lock);
197 spin_unlock(&new_base->lock); 260 timer->base = &new_base->t_base;
198 goto repeat;
199 } 261 }
200 } 262 }
201 263
202 /*
203 * Delete the previous timeout (if there was any), and install
204 * the new one:
205 */
206 if (old_base) {
207 list_del(&timer->entry);
208 ret = 1;
209 }
210 timer->expires = expires; 264 timer->expires = expires;
211 internal_add_timer(new_base, timer); 265 internal_add_timer(new_base, timer);
212 timer->base = new_base; 266 spin_unlock_irqrestore(&new_base->t_base.lock, flags);
213
214 if (old_base && (new_base != old_base))
215 spin_unlock(&old_base->lock);
216 spin_unlock(&new_base->lock);
217 spin_unlock_irqrestore(&timer->lock, flags);
218 267
219 return ret; 268 return ret;
220} 269}
@@ -232,15 +281,15 @@ void add_timer_on(struct timer_list *timer, int cpu)
232{ 281{
233 tvec_base_t *base = &per_cpu(tvec_bases, cpu); 282 tvec_base_t *base = &per_cpu(tvec_bases, cpu);
234 unsigned long flags; 283 unsigned long flags;
235 284
236 BUG_ON(timer_pending(timer) || !timer->function); 285 BUG_ON(timer_pending(timer) || !timer->function);
237 286
238 check_timer(timer); 287 check_timer(timer);
239 288
240 spin_lock_irqsave(&base->lock, flags); 289 spin_lock_irqsave(&base->t_base.lock, flags);
290 timer->base = &base->t_base;
241 internal_add_timer(base, timer); 291 internal_add_timer(base, timer);
242 timer->base = base; 292 spin_unlock_irqrestore(&base->t_base.lock, flags);
243 spin_unlock_irqrestore(&base->lock, flags);
244} 293}
245 294
246 295
@@ -295,109 +344,84 @@ EXPORT_SYMBOL(mod_timer);
295 */ 344 */
296int del_timer(struct timer_list *timer) 345int del_timer(struct timer_list *timer)
297{ 346{
347 timer_base_t *base;
298 unsigned long flags; 348 unsigned long flags;
299 tvec_base_t *base; 349 int ret = 0;
300 350
301 check_timer(timer); 351 check_timer(timer);
302 352
303repeat: 353 if (timer_pending(timer)) {
304 base = timer->base; 354 base = lock_timer_base(timer, &flags);
305 if (!base) 355 if (timer_pending(timer)) {
306 return 0; 356 detach_timer(timer, 1);
307 spin_lock_irqsave(&base->lock, flags); 357 ret = 1;
308 if (base != timer->base) { 358 }
309 spin_unlock_irqrestore(&base->lock, flags); 359 spin_unlock_irqrestore(&base->lock, flags);
310 goto repeat;
311 } 360 }
312 list_del(&timer->entry);
313 /* Need to make sure that anybody who sees a NULL base also sees the list ops */
314 smp_wmb();
315 timer->base = NULL;
316 spin_unlock_irqrestore(&base->lock, flags);
317 361
318 return 1; 362 return ret;
319} 363}
320 364
321EXPORT_SYMBOL(del_timer); 365EXPORT_SYMBOL(del_timer);
322 366
323#ifdef CONFIG_SMP 367#ifdef CONFIG_SMP
324/*** 368/*
325 * del_timer_sync - deactivate a timer and wait for the handler to finish. 369 * This function tries to deactivate a timer. Upon successful (ret >= 0)
326 * @timer: the timer to be deactivated 370 * exit the timer is not queued and the handler is not running on any CPU.
327 *
328 * This function only differs from del_timer() on SMP: besides deactivating
329 * the timer it also makes sure the handler has finished executing on other
330 * CPUs.
331 *
332 * Synchronization rules: callers must prevent restarting of the timer,
333 * otherwise this function is meaningless. It must not be called from
334 * interrupt contexts. The caller must not hold locks which would prevent
335 * completion of the timer's handler. Upon exit the timer is not queued and
336 * the handler is not running on any CPU.
337 *
338 * The function returns whether it has deactivated a pending timer or not.
339 * 371 *
340 * del_timer_sync() is slow and complicated because it copes with timer 372 * It must not be called from interrupt contexts.
341 * handlers which re-arm the timer (periodic timers). If the timer handler
342 * is known to not do this (a single shot timer) then use
343 * del_singleshot_timer_sync() instead.
344 */ 373 */
345int del_timer_sync(struct timer_list *timer) 374int try_to_del_timer_sync(struct timer_list *timer)
346{ 375{
347 tvec_base_t *base; 376 timer_base_t *base;
348 int i, ret = 0; 377 unsigned long flags;
378 int ret = -1;
349 379
350 check_timer(timer); 380 base = lock_timer_base(timer, &flags);
351 381
352del_again: 382 if (base->running_timer == timer)
353 ret += del_timer(timer); 383 goto out;
354 384
355 for_each_online_cpu(i) { 385 ret = 0;
356 base = &per_cpu(tvec_bases, i); 386 if (timer_pending(timer)) {
357 if (base->running_timer == timer) { 387 detach_timer(timer, 1);
358 while (base->running_timer == timer) { 388 ret = 1;
359 cpu_relax();
360 preempt_check_resched();
361 }
362 break;
363 }
364 } 389 }
365 smp_rmb(); 390out:
366 if (timer_pending(timer)) 391 spin_unlock_irqrestore(&base->lock, flags);
367 goto del_again;
368 392
369 return ret; 393 return ret;
370} 394}
371EXPORT_SYMBOL(del_timer_sync);
372 395
373/*** 396/***
374 * del_singleshot_timer_sync - deactivate a non-recursive timer 397 * del_timer_sync - deactivate a timer and wait for the handler to finish.
375 * @timer: the timer to be deactivated 398 * @timer: the timer to be deactivated
376 * 399 *
377 * This function is an optimization of del_timer_sync for the case where the 400 * This function only differs from del_timer() on SMP: besides deactivating
378 * caller can guarantee the timer does not reschedule itself in its timer 401 * the timer it also makes sure the handler has finished executing on other
379 * function. 402 * CPUs.
380 * 403 *
381 * Synchronization rules: callers must prevent restarting of the timer, 404 * Synchronization rules: callers must prevent restarting of the timer,
382 * otherwise this function is meaningless. It must not be called from 405 * otherwise this function is meaningless. It must not be called from
383 * interrupt contexts. The caller must not hold locks which wold prevent 406 * interrupt contexts. The caller must not hold locks which would prevent
384 * completion of the timer's handler. Upon exit the timer is not queued and 407 * completion of the timer's handler. The timer's handler must not call
385 * the handler is not running on any CPU. 408 * add_timer_on(). Upon exit the timer is not queued and the handler is
409 * not running on any CPU.
386 * 410 *
387 * The function returns whether it has deactivated a pending timer or not. 411 * The function returns whether it has deactivated a pending timer or not.
388 */ 412 */
389int del_singleshot_timer_sync(struct timer_list *timer) 413int del_timer_sync(struct timer_list *timer)
390{ 414{
391 int ret = del_timer(timer); 415 check_timer(timer);
392 416
393 if (!ret) { 417 for (;;) {
394 ret = del_timer_sync(timer); 418 int ret = try_to_del_timer_sync(timer);
395 BUG_ON(ret); 419 if (ret >= 0)
420 return ret;
396 } 421 }
397
398 return ret;
399} 422}
400EXPORT_SYMBOL(del_singleshot_timer_sync); 423
424EXPORT_SYMBOL(del_timer_sync);
401#endif 425#endif
402 426
403static int cascade(tvec_base_t *base, tvec_t *tv, int index) 427static int cascade(tvec_base_t *base, tvec_t *tv, int index)
@@ -415,7 +439,7 @@ static int cascade(tvec_base_t *base, tvec_t *tv, int index)
415 struct timer_list *tmp; 439 struct timer_list *tmp;
416 440
417 tmp = list_entry(curr, struct timer_list, entry); 441 tmp = list_entry(curr, struct timer_list, entry);
418 BUG_ON(tmp->base != base); 442 BUG_ON(tmp->base != &base->t_base);
419 curr = curr->next; 443 curr = curr->next;
420 internal_add_timer(base, tmp); 444 internal_add_timer(base, tmp);
421 } 445 }
@@ -437,7 +461,7 @@ static inline void __run_timers(tvec_base_t *base)
437{ 461{
438 struct timer_list *timer; 462 struct timer_list *timer;
439 463
440 spin_lock_irq(&base->lock); 464 spin_lock_irq(&base->t_base.lock);
441 while (time_after_eq(jiffies, base->timer_jiffies)) { 465 while (time_after_eq(jiffies, base->timer_jiffies)) {
442 struct list_head work_list = LIST_HEAD_INIT(work_list); 466 struct list_head work_list = LIST_HEAD_INIT(work_list);
443 struct list_head *head = &work_list; 467 struct list_head *head = &work_list;
@@ -453,8 +477,7 @@ static inline void __run_timers(tvec_base_t *base)
453 cascade(base, &base->tv5, INDEX(3)); 477 cascade(base, &base->tv5, INDEX(3));
454 ++base->timer_jiffies; 478 ++base->timer_jiffies;
455 list_splice_init(base->tv1.vec + index, &work_list); 479 list_splice_init(base->tv1.vec + index, &work_list);
456repeat: 480 while (!list_empty(head)) {
457 if (!list_empty(head)) {
458 void (*fn)(unsigned long); 481 void (*fn)(unsigned long);
459 unsigned long data; 482 unsigned long data;
460 483
@@ -462,25 +485,26 @@ repeat:
462 fn = timer->function; 485 fn = timer->function;
463 data = timer->data; 486 data = timer->data;
464 487
465 list_del(&timer->entry);
466 set_running_timer(base, timer); 488 set_running_timer(base, timer);
467 smp_wmb(); 489 detach_timer(timer, 1);
468 timer->base = NULL; 490 spin_unlock_irq(&base->t_base.lock);
469 spin_unlock_irq(&base->lock);
470 { 491 {
471 u32 preempt_count = preempt_count(); 492 int preempt_count = preempt_count();
472 fn(data); 493 fn(data);
473 if (preempt_count != preempt_count()) { 494 if (preempt_count != preempt_count()) {
474 printk("huh, entered %p with %08x, exited with %08x?\n", fn, preempt_count, preempt_count()); 495 printk(KERN_WARNING "huh, entered %p "
496 "with preempt_count %08x, exited"
497 " with %08x?\n",
498 fn, preempt_count,
499 preempt_count());
475 BUG(); 500 BUG();
476 } 501 }
477 } 502 }
478 spin_lock_irq(&base->lock); 503 spin_lock_irq(&base->t_base.lock);
479 goto repeat;
480 } 504 }
481 } 505 }
482 set_running_timer(base, NULL); 506 set_running_timer(base, NULL);
483 spin_unlock_irq(&base->lock); 507 spin_unlock_irq(&base->t_base.lock);
484} 508}
485 509
486#ifdef CONFIG_NO_IDLE_HZ 510#ifdef CONFIG_NO_IDLE_HZ
@@ -499,7 +523,7 @@ unsigned long next_timer_interrupt(void)
499 int i, j; 523 int i, j;
500 524
501 base = &__get_cpu_var(tvec_bases); 525 base = &__get_cpu_var(tvec_bases);
502 spin_lock(&base->lock); 526 spin_lock(&base->t_base.lock);
503 expires = base->timer_jiffies + (LONG_MAX >> 1); 527 expires = base->timer_jiffies + (LONG_MAX >> 1);
504 list = 0; 528 list = 0;
505 529
@@ -547,7 +571,7 @@ found:
547 expires = nte->expires; 571 expires = nte->expires;
548 } 572 }
549 } 573 }
550 spin_unlock(&base->lock); 574 spin_unlock(&base->t_base.lock);
551 return expires; 575 return expires;
552} 576}
553#endif 577#endif
@@ -1286,9 +1310,9 @@ static void __devinit init_timers_cpu(int cpu)
1286{ 1310{
1287 int j; 1311 int j;
1288 tvec_base_t *base; 1312 tvec_base_t *base;
1289 1313
1290 base = &per_cpu(tvec_bases, cpu); 1314 base = &per_cpu(tvec_bases, cpu);
1291 spin_lock_init(&base->lock); 1315 spin_lock_init(&base->t_base.lock);
1292 for (j = 0; j < TVN_SIZE; j++) { 1316 for (j = 0; j < TVN_SIZE; j++) {
1293 INIT_LIST_HEAD(base->tv5.vec + j); 1317 INIT_LIST_HEAD(base->tv5.vec + j);
1294 INIT_LIST_HEAD(base->tv4.vec + j); 1318 INIT_LIST_HEAD(base->tv4.vec + j);
@@ -1302,22 +1326,16 @@ static void __devinit init_timers_cpu(int cpu)
1302} 1326}
1303 1327
1304#ifdef CONFIG_HOTPLUG_CPU 1328#ifdef CONFIG_HOTPLUG_CPU
1305static int migrate_timer_list(tvec_base_t *new_base, struct list_head *head) 1329static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
1306{ 1330{
1307 struct timer_list *timer; 1331 struct timer_list *timer;
1308 1332
1309 while (!list_empty(head)) { 1333 while (!list_empty(head)) {
1310 timer = list_entry(head->next, struct timer_list, entry); 1334 timer = list_entry(head->next, struct timer_list, entry);
1311 /* We're locking backwards from __mod_timer order here, 1335 detach_timer(timer, 0);
1312 beware deadlock. */ 1336 timer->base = &new_base->t_base;
1313 if (!spin_trylock(&timer->lock))
1314 return 0;
1315 list_del(&timer->entry);
1316 internal_add_timer(new_base, timer); 1337 internal_add_timer(new_base, timer);
1317 timer->base = new_base;
1318 spin_unlock(&timer->lock);
1319 } 1338 }
1320 return 1;
1321} 1339}
1322 1340
1323static void __devinit migrate_timers(int cpu) 1341static void __devinit migrate_timers(int cpu)
@@ -1331,39 +1349,24 @@ static void __devinit migrate_timers(int cpu)
1331 new_base = &get_cpu_var(tvec_bases); 1349 new_base = &get_cpu_var(tvec_bases);
1332 1350
1333 local_irq_disable(); 1351 local_irq_disable();
1334again: 1352 spin_lock(&new_base->t_base.lock);
1335 /* Prevent deadlocks via ordering by old_base < new_base. */ 1353 spin_lock(&old_base->t_base.lock);
1336 if (old_base < new_base) {
1337 spin_lock(&new_base->lock);
1338 spin_lock(&old_base->lock);
1339 } else {
1340 spin_lock(&old_base->lock);
1341 spin_lock(&new_base->lock);
1342 }
1343 1354
1344 if (old_base->running_timer) 1355 if (old_base->t_base.running_timer)
1345 BUG(); 1356 BUG();
1346 for (i = 0; i < TVR_SIZE; i++) 1357 for (i = 0; i < TVR_SIZE; i++)
1347 if (!migrate_timer_list(new_base, old_base->tv1.vec + i)) 1358 migrate_timer_list(new_base, old_base->tv1.vec + i);
1348 goto unlock_again; 1359 for (i = 0; i < TVN_SIZE; i++) {
1349 for (i = 0; i < TVN_SIZE; i++) 1360 migrate_timer_list(new_base, old_base->tv2.vec + i);
1350 if (!migrate_timer_list(new_base, old_base->tv2.vec + i) 1361 migrate_timer_list(new_base, old_base->tv3.vec + i);
1351 || !migrate_timer_list(new_base, old_base->tv3.vec + i) 1362 migrate_timer_list(new_base, old_base->tv4.vec + i);
1352 || !migrate_timer_list(new_base, old_base->tv4.vec + i) 1363 migrate_timer_list(new_base, old_base->tv5.vec + i);
1353 || !migrate_timer_list(new_base, old_base->tv5.vec + i)) 1364 }
1354 goto unlock_again; 1365
1355 spin_unlock(&old_base->lock); 1366 spin_unlock(&old_base->t_base.lock);
1356 spin_unlock(&new_base->lock); 1367 spin_unlock(&new_base->t_base.lock);
1357 local_irq_enable(); 1368 local_irq_enable();
1358 put_cpu_var(tvec_bases); 1369 put_cpu_var(tvec_bases);
1359 return;
1360
1361unlock_again:
1362 /* Avoid deadlock with __mod_timer, by backing off. */
1363 spin_unlock(&old_base->lock);
1364 spin_unlock(&new_base->lock);
1365 cpu_relax();
1366 goto again;
1367} 1370}
1368#endif /* CONFIG_HOTPLUG_CPU */ 1371#endif /* CONFIG_HOTPLUG_CPU */
1369 1372