aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOleg Nesterov <oleg@tv-sign.ru>2005-06-23 03:08:56 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-23 12:45:16 -0400
commit55c888d6d09a0df236adfaf8ccf06ff5d0646775 (patch)
treedeb9434abe3cf7c9b714ccb267ef5d943a847dfe
parentbdd646a44672115c986593956aa4ef105485a184 (diff)
[PATCH] timers fixes/improvements
This patch tries to solve following problems: 1. del_timer_sync() is racy. The timer can be fired again after del_timer_sync have checked all cpus and before it will recheck timer_pending(). 2. It has scalability problems. All cpus are scanned to determine if the timer is running on that cpu. With this patch del_timer_sync is O(1) and no slower than plain del_timer(pending_timer), unless it has to actually wait for completion of the currently running timer. The only restriction is that the recurring timer should not use add_timer_on(). 3. The timers are not serialized wrt to itself. If CPU_0 does mod_timer(jiffies+1) while the timer is currently running on CPU 1, it is quite possible that local interrupt on CPU_0 will start that timer before it finished on CPU_1. 4. The timers locking is suboptimal. __mod_timer() takes 3 locks at once and still requires wmb() in del_timer/run_timers. The new implementation takes 2 locks sequentially and does not need memory barriers. Currently ->base != NULL means that the timer is pending. In that case ->base.lock is used to lock the timer. __mod_timer also takes timer->lock because ->base can be == NULL. This patch uses timer->entry.next != NULL as indication that the timer is pending. So it does __list_del(), entry->next = NULL instead of list_del() when the timer is deleted. The ->base field is used for hashed locking only, it is initialized in init_timer() which sets ->base = per_cpu(tvec_bases). When the tvec_bases.lock is locked, it means that all timers which are tied to this base via timer->base are locked, and the base itself is locked too. So __run_timers/migrate_timers can safely modify all timers which could be found on ->tvX lists (pending timers). When the timer's base is locked, and the timer removed from ->entry list (which means that _run_timers/migrate_timers can't see this timer), it is possible to set timer->base = NULL and drop the lock: the timer remains locked. This patch adds lock_timer_base() helper, which waits for ->base != NULL, locks the ->base, and checks it is still the same. __mod_timer() schedules the timer on the local CPU and changes it's base. However, it does not lock both old and new bases at once. It locks the timer via lock_timer_base(), deletes the timer, sets ->base = NULL, and unlocks old base. Then __mod_timer() locks new_base, sets ->base = new_base, and adds this timer. This simplifies the code, because AB-BA deadlock is not possible. __mod_timer() also ensures that the timer's base is not changed while the timer's handler is running on the old base. __run_timers(), del_timer() do not change ->base anymore, they only clear pending flag. So del_timer_sync() can test timer->base->running_timer == timer to detect whether it is running or not. We don't need timer_list->lock anymore, this patch kills it. We also don't need barriers. del_timer() and __run_timers() used smp_wmb() before clearing timer's pending flag. It was needed because __mod_timer() did not lock old_base if the timer is not pending, so __mod_timer()->list_add() could race with del_timer()->list_del(). With this patch these functions are serialized through base->lock. One problem. TIMER_INITIALIZER can't use per_cpu(tvec_bases). So this patch adds global struct timer_base_s { spinlock_t lock; struct timer_list *running_timer; } __init_timer_base; which is used by TIMER_INITIALIZER. The corresponding fields in tvec_t_base_s struct are replaced by struct timer_base_s t_base. It is indeed ugly. But this can't have scalability problems. The global __init_timer_base.lock is used only when __mod_timer() is called for the first time AND the timer was compile time initialized. After that the timer migrates to the local CPU. Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru> Acked-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Renaud Lienhart <renaud.lienhart@free.fr> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/timer.h30
-rw-r--r--kernel/timer.c328
2 files changed, 166 insertions, 192 deletions
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 90db1cc62ddd..2e78fedfc069 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -6,45 +6,33 @@
6#include <linux/spinlock.h> 6#include <linux/spinlock.h>
7#include <linux/stddef.h> 7#include <linux/stddef.h>
8 8
9struct tvec_t_base_s; 9struct timer_base_s;
10 10
11struct timer_list { 11struct timer_list {
12 struct list_head entry; 12 struct list_head entry;
13 unsigned long expires; 13 unsigned long expires;
14 14
15 spinlock_t lock;
16 unsigned long magic; 15 unsigned long magic;
17 16
18 void (*function)(unsigned long); 17 void (*function)(unsigned long);
19 unsigned long data; 18 unsigned long data;
20 19
21 struct tvec_t_base_s *base; 20 struct timer_base_s *base;
22}; 21};
23 22
24#define TIMER_MAGIC 0x4b87ad6e 23#define TIMER_MAGIC 0x4b87ad6e
25 24
25extern struct timer_base_s __init_timer_base;
26
26#define TIMER_INITIALIZER(_function, _expires, _data) { \ 27#define TIMER_INITIALIZER(_function, _expires, _data) { \
27 .function = (_function), \ 28 .function = (_function), \
28 .expires = (_expires), \ 29 .expires = (_expires), \
29 .data = (_data), \ 30 .data = (_data), \
30 .base = NULL, \ 31 .base = &__init_timer_base, \
31 .magic = TIMER_MAGIC, \ 32 .magic = TIMER_MAGIC, \
32 .lock = SPIN_LOCK_UNLOCKED, \
33 } 33 }
34 34
35/*** 35void fastcall init_timer(struct timer_list * timer);
36 * init_timer - initialize a timer.
37 * @timer: the timer to be initialized
38 *
39 * init_timer() must be done to a timer prior calling *any* of the
40 * other timer functions.
41 */
42static inline void init_timer(struct timer_list * timer)
43{
44 timer->base = NULL;
45 timer->magic = TIMER_MAGIC;
46 spin_lock_init(&timer->lock);
47}
48 36
49/*** 37/***
50 * timer_pending - is a timer pending? 38 * timer_pending - is a timer pending?
@@ -58,7 +46,7 @@ static inline void init_timer(struct timer_list * timer)
58 */ 46 */
59static inline int timer_pending(const struct timer_list * timer) 47static inline int timer_pending(const struct timer_list * timer)
60{ 48{
61 return timer->base != NULL; 49 return timer->entry.next != NULL;
62} 50}
63 51
64extern void add_timer_on(struct timer_list *timer, int cpu); 52extern void add_timer_on(struct timer_list *timer, int cpu);
@@ -89,12 +77,12 @@ static inline void add_timer(struct timer_list * timer)
89 77
90#ifdef CONFIG_SMP 78#ifdef CONFIG_SMP
91 extern int del_timer_sync(struct timer_list *timer); 79 extern int del_timer_sync(struct timer_list *timer);
92 extern int del_singleshot_timer_sync(struct timer_list *timer);
93#else 80#else
94# define del_timer_sync(t) del_timer(t) 81# define del_timer_sync(t) del_timer(t)
95# define del_singleshot_timer_sync(t) del_timer(t)
96#endif 82#endif
97 83
84#define del_singleshot_timer_sync(t) del_timer_sync(t)
85
98extern void init_timers(void); 86extern void init_timers(void);
99extern void run_local_timers(void); 87extern void run_local_timers(void);
100extern void it_real_fn(unsigned long); 88extern void it_real_fn(unsigned long);
diff --git a/kernel/timer.c b/kernel/timer.c
index 207aa4f0aa10..8aadc62efd65 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -57,6 +57,11 @@ static void time_interpolator_update(long delta_nsec);
57#define TVN_MASK (TVN_SIZE - 1) 57#define TVN_MASK (TVN_SIZE - 1)
58#define TVR_MASK (TVR_SIZE - 1) 58#define TVR_MASK (TVR_SIZE - 1)
59 59
60struct timer_base_s {
61 spinlock_t lock;
62 struct timer_list *running_timer;
63};
64
60typedef struct tvec_s { 65typedef struct tvec_s {
61 struct list_head vec[TVN_SIZE]; 66 struct list_head vec[TVN_SIZE];
62} tvec_t; 67} tvec_t;
@@ -66,9 +71,8 @@ typedef struct tvec_root_s {
66} tvec_root_t; 71} tvec_root_t;
67 72
68struct tvec_t_base_s { 73struct tvec_t_base_s {
69 spinlock_t lock; 74 struct timer_base_s t_base;
70 unsigned long timer_jiffies; 75 unsigned long timer_jiffies;
71 struct timer_list *running_timer;
72 tvec_root_t tv1; 76 tvec_root_t tv1;
73 tvec_t tv2; 77 tvec_t tv2;
74 tvec_t tv3; 78 tvec_t tv3;
@@ -77,18 +81,16 @@ struct tvec_t_base_s {
77} ____cacheline_aligned_in_smp; 81} ____cacheline_aligned_in_smp;
78 82
79typedef struct tvec_t_base_s tvec_base_t; 83typedef struct tvec_t_base_s tvec_base_t;
84static DEFINE_PER_CPU(tvec_base_t, tvec_bases);
80 85
81static inline void set_running_timer(tvec_base_t *base, 86static inline void set_running_timer(tvec_base_t *base,
82 struct timer_list *timer) 87 struct timer_list *timer)
83{ 88{
84#ifdef CONFIG_SMP 89#ifdef CONFIG_SMP
85 base->running_timer = timer; 90 base->t_base.running_timer = timer;
86#endif 91#endif
87} 92}
88 93
89/* Fake initialization */
90static DEFINE_PER_CPU(tvec_base_t, tvec_bases) = { SPIN_LOCK_UNLOCKED };
91
92static void check_timer_failed(struct timer_list *timer) 94static void check_timer_failed(struct timer_list *timer)
93{ 95{
94 static int whine_count; 96 static int whine_count;
@@ -103,7 +105,6 @@ static void check_timer_failed(struct timer_list *timer)
103 /* 105 /*
104 * Now fix it up 106 * Now fix it up
105 */ 107 */
106 spin_lock_init(&timer->lock);
107 timer->magic = TIMER_MAGIC; 108 timer->magic = TIMER_MAGIC;
108} 109}
109 110
@@ -156,65 +157,113 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
156 list_add_tail(&timer->entry, vec); 157 list_add_tail(&timer->entry, vec);
157} 158}
158 159
160typedef struct timer_base_s timer_base_t;
161/*
162 * Used by TIMER_INITIALIZER, we can't use per_cpu(tvec_bases)
163 * at compile time, and we need timer->base to lock the timer.
164 */
165timer_base_t __init_timer_base
166 ____cacheline_aligned_in_smp = { .lock = SPIN_LOCK_UNLOCKED };
167EXPORT_SYMBOL(__init_timer_base);
168
169/***
170 * init_timer - initialize a timer.
171 * @timer: the timer to be initialized
172 *
173 * init_timer() must be done to a timer prior calling *any* of the
174 * other timer functions.
175 */
176void fastcall init_timer(struct timer_list *timer)
177{
178 timer->entry.next = NULL;
179 timer->base = &per_cpu(tvec_bases, raw_smp_processor_id()).t_base;
180 timer->magic = TIMER_MAGIC;
181}
182EXPORT_SYMBOL(init_timer);
183
184static inline void detach_timer(struct timer_list *timer,
185 int clear_pending)
186{
187 struct list_head *entry = &timer->entry;
188
189 __list_del(entry->prev, entry->next);
190 if (clear_pending)
191 entry->next = NULL;
192 entry->prev = LIST_POISON2;
193}
194
195/*
196 * We are using hashed locking: holding per_cpu(tvec_bases).t_base.lock
197 * means that all timers which are tied to this base via timer->base are
198 * locked, and the base itself is locked too.
199 *
200 * So __run_timers/migrate_timers can safely modify all timers which could
201 * be found on ->tvX lists.
202 *
203 * When the timer's base is locked, and the timer removed from list, it is
204 * possible to set timer->base = NULL and drop the lock: the timer remains
205 * locked.
206 */
207static timer_base_t *lock_timer_base(struct timer_list *timer,
208 unsigned long *flags)
209{
210 timer_base_t *base;
211
212 for (;;) {
213 base = timer->base;
214 if (likely(base != NULL)) {
215 spin_lock_irqsave(&base->lock, *flags);
216 if (likely(base == timer->base))
217 return base;
218 /* The timer has migrated to another CPU */
219 spin_unlock_irqrestore(&base->lock, *flags);
220 }
221 cpu_relax();
222 }
223}
224
159int __mod_timer(struct timer_list *timer, unsigned long expires) 225int __mod_timer(struct timer_list *timer, unsigned long expires)
160{ 226{
161 tvec_base_t *old_base, *new_base; 227 timer_base_t *base;
228 tvec_base_t *new_base;
162 unsigned long flags; 229 unsigned long flags;
163 int ret = 0; 230 int ret = 0;
164 231
165 BUG_ON(!timer->function); 232 BUG_ON(!timer->function);
166
167 check_timer(timer); 233 check_timer(timer);
168 234
169 spin_lock_irqsave(&timer->lock, flags); 235 base = lock_timer_base(timer, &flags);
236
237 if (timer_pending(timer)) {
238 detach_timer(timer, 0);
239 ret = 1;
240 }
241
170 new_base = &__get_cpu_var(tvec_bases); 242 new_base = &__get_cpu_var(tvec_bases);
171repeat:
172 old_base = timer->base;
173 243
174 /* 244 if (base != &new_base->t_base) {
175 * Prevent deadlocks via ordering by old_base < new_base.
176 */
177 if (old_base && (new_base != old_base)) {
178 if (old_base < new_base) {
179 spin_lock(&new_base->lock);
180 spin_lock(&old_base->lock);
181 } else {
182 spin_lock(&old_base->lock);
183 spin_lock(&new_base->lock);
184 }
185 /* 245 /*
186 * The timer base might have been cancelled while we were 246 * We are trying to schedule the timer on the local CPU.
187 * trying to take the lock(s): 247 * However we can't change timer's base while it is running,
248 * otherwise del_timer_sync() can't detect that the timer's
249 * handler yet has not finished. This also guarantees that
250 * the timer is serialized wrt itself.
188 */ 251 */
189 if (timer->base != old_base) { 252 if (unlikely(base->running_timer == timer)) {
190 spin_unlock(&new_base->lock); 253 /* The timer remains on a former base */
191 spin_unlock(&old_base->lock); 254 new_base = container_of(base, tvec_base_t, t_base);
192 goto repeat; 255 } else {
193 } 256 /* See the comment in lock_timer_base() */
194 } else { 257 timer->base = NULL;
195 spin_lock(&new_base->lock); 258 spin_unlock(&base->lock);
196 if (timer->base != old_base) { 259 spin_lock(&new_base->t_base.lock);
197 spin_unlock(&new_base->lock); 260 timer->base = &new_base->t_base;
198 goto repeat;
199 } 261 }
200 } 262 }
201 263
202 /*
203 * Delete the previous timeout (if there was any), and install
204 * the new one:
205 */
206 if (old_base) {
207 list_del(&timer->entry);
208 ret = 1;
209 }
210 timer->expires = expires; 264 timer->expires = expires;
211 internal_add_timer(new_base, timer); 265 internal_add_timer(new_base, timer);
212 timer->base = new_base; 266 spin_unlock_irqrestore(&new_base->t_base.lock, flags);
213
214 if (old_base && (new_base != old_base))
215 spin_unlock(&old_base->lock);
216 spin_unlock(&new_base->lock);
217 spin_unlock_irqrestore(&timer->lock, flags);
218 267
219 return ret; 268 return ret;
220} 269}
@@ -232,15 +281,15 @@ void add_timer_on(struct timer_list *timer, int cpu)
232{ 281{
233 tvec_base_t *base = &per_cpu(tvec_bases, cpu); 282 tvec_base_t *base = &per_cpu(tvec_bases, cpu);
234 unsigned long flags; 283 unsigned long flags;
235 284
236 BUG_ON(timer_pending(timer) || !timer->function); 285 BUG_ON(timer_pending(timer) || !timer->function);
237 286
238 check_timer(timer); 287 check_timer(timer);
239 288
240 spin_lock_irqsave(&base->lock, flags); 289 spin_lock_irqsave(&base->t_base.lock, flags);
290 timer->base = &base->t_base;
241 internal_add_timer(base, timer); 291 internal_add_timer(base, timer);
242 timer->base = base; 292 spin_unlock_irqrestore(&base->t_base.lock, flags);
243 spin_unlock_irqrestore(&base->lock, flags);
244} 293}
245 294
246 295
@@ -295,27 +344,22 @@ EXPORT_SYMBOL(mod_timer);
295 */ 344 */
296int del_timer(struct timer_list *timer) 345int del_timer(struct timer_list *timer)
297{ 346{
347 timer_base_t *base;
298 unsigned long flags; 348 unsigned long flags;
299 tvec_base_t *base; 349 int ret = 0;
300 350
301 check_timer(timer); 351 check_timer(timer);
302 352
303repeat: 353 if (timer_pending(timer)) {
304 base = timer->base; 354 base = lock_timer_base(timer, &flags);
305 if (!base) 355 if (timer_pending(timer)) {
306 return 0; 356 detach_timer(timer, 1);
307 spin_lock_irqsave(&base->lock, flags); 357 ret = 1;
308 if (base != timer->base) { 358 }
309 spin_unlock_irqrestore(&base->lock, flags); 359 spin_unlock_irqrestore(&base->lock, flags);
310 goto repeat;
311 } 360 }
312 list_del(&timer->entry);
313 /* Need to make sure that anybody who sees a NULL base also sees the list ops */
314 smp_wmb();
315 timer->base = NULL;
316 spin_unlock_irqrestore(&base->lock, flags);
317 361
318 return 1; 362 return ret;
319} 363}
320 364
321EXPORT_SYMBOL(del_timer); 365EXPORT_SYMBOL(del_timer);
@@ -332,72 +376,39 @@ EXPORT_SYMBOL(del_timer);
332 * Synchronization rules: callers must prevent restarting of the timer, 376 * Synchronization rules: callers must prevent restarting of the timer,
333 * otherwise this function is meaningless. It must not be called from 377 * otherwise this function is meaningless. It must not be called from
334 * interrupt contexts. The caller must not hold locks which would prevent 378 * interrupt contexts. The caller must not hold locks which would prevent
335 * completion of the timer's handler. Upon exit the timer is not queued and 379 * completion of the timer's handler. The timer's handler must not call
336 * the handler is not running on any CPU. 380 * add_timer_on(). Upon exit the timer is not queued and the handler is
381 * not running on any CPU.
337 * 382 *
338 * The function returns whether it has deactivated a pending timer or not. 383 * The function returns whether it has deactivated a pending timer or not.
339 *
340 * del_timer_sync() is slow and complicated because it copes with timer
341 * handlers which re-arm the timer (periodic timers). If the timer handler
342 * is known to not do this (a single shot timer) then use
343 * del_singleshot_timer_sync() instead.
344 */ 384 */
345int del_timer_sync(struct timer_list *timer) 385int del_timer_sync(struct timer_list *timer)
346{ 386{
347 tvec_base_t *base; 387 timer_base_t *base;
348 int i, ret = 0; 388 unsigned long flags;
389 int ret = -1;
349 390
350 check_timer(timer); 391 check_timer(timer);
351 392
352del_again: 393 do {
353 ret += del_timer(timer); 394 base = lock_timer_base(timer, &flags);
354 395
355 for_each_online_cpu(i) { 396 if (base->running_timer == timer)
356 base = &per_cpu(tvec_bases, i); 397 goto unlock;
357 if (base->running_timer == timer) { 398
358 while (base->running_timer == timer) { 399 ret = 0;
359 cpu_relax(); 400 if (timer_pending(timer)) {
360 preempt_check_resched(); 401 detach_timer(timer, 1);
361 } 402 ret = 1;
362 break;
363 } 403 }
364 } 404unlock:
365 smp_rmb(); 405 spin_unlock_irqrestore(&base->lock, flags);
366 if (timer_pending(timer)) 406 } while (ret < 0);
367 goto del_again;
368 407
369 return ret; 408 return ret;
370} 409}
371EXPORT_SYMBOL(del_timer_sync);
372 410
373/*** 411EXPORT_SYMBOL(del_timer_sync);
374 * del_singleshot_timer_sync - deactivate a non-recursive timer
375 * @timer: the timer to be deactivated
376 *
377 * This function is an optimization of del_timer_sync for the case where the
378 * caller can guarantee the timer does not reschedule itself in its timer
379 * function.
380 *
381 * Synchronization rules: callers must prevent restarting of the timer,
382 * otherwise this function is meaningless. It must not be called from
383 * interrupt contexts. The caller must not hold locks which wold prevent
384 * completion of the timer's handler. Upon exit the timer is not queued and
385 * the handler is not running on any CPU.
386 *
387 * The function returns whether it has deactivated a pending timer or not.
388 */
389int del_singleshot_timer_sync(struct timer_list *timer)
390{
391 int ret = del_timer(timer);
392
393 if (!ret) {
394 ret = del_timer_sync(timer);
395 BUG_ON(ret);
396 }
397
398 return ret;
399}
400EXPORT_SYMBOL(del_singleshot_timer_sync);
401#endif 412#endif
402 413
403static int cascade(tvec_base_t *base, tvec_t *tv, int index) 414static int cascade(tvec_base_t *base, tvec_t *tv, int index)
@@ -415,7 +426,7 @@ static int cascade(tvec_base_t *base, tvec_t *tv, int index)
415 struct timer_list *tmp; 426 struct timer_list *tmp;
416 427
417 tmp = list_entry(curr, struct timer_list, entry); 428 tmp = list_entry(curr, struct timer_list, entry);
418 BUG_ON(tmp->base != base); 429 BUG_ON(tmp->base != &base->t_base);
419 curr = curr->next; 430 curr = curr->next;
420 internal_add_timer(base, tmp); 431 internal_add_timer(base, tmp);
421 } 432 }
@@ -437,7 +448,7 @@ static inline void __run_timers(tvec_base_t *base)
437{ 448{
438 struct timer_list *timer; 449 struct timer_list *timer;
439 450
440 spin_lock_irq(&base->lock); 451 spin_lock_irq(&base->t_base.lock);
441 while (time_after_eq(jiffies, base->timer_jiffies)) { 452 while (time_after_eq(jiffies, base->timer_jiffies)) {
442 struct list_head work_list = LIST_HEAD_INIT(work_list); 453 struct list_head work_list = LIST_HEAD_INIT(work_list);
443 struct list_head *head = &work_list; 454 struct list_head *head = &work_list;
@@ -453,8 +464,7 @@ static inline void __run_timers(tvec_base_t *base)
453 cascade(base, &base->tv5, INDEX(3)); 464 cascade(base, &base->tv5, INDEX(3));
454 ++base->timer_jiffies; 465 ++base->timer_jiffies;
455 list_splice_init(base->tv1.vec + index, &work_list); 466 list_splice_init(base->tv1.vec + index, &work_list);
456repeat: 467 while (!list_empty(head)) {
457 if (!list_empty(head)) {
458 void (*fn)(unsigned long); 468 void (*fn)(unsigned long);
459 unsigned long data; 469 unsigned long data;
460 470
@@ -462,11 +472,9 @@ repeat:
462 fn = timer->function; 472 fn = timer->function;
463 data = timer->data; 473 data = timer->data;
464 474
465 list_del(&timer->entry);
466 set_running_timer(base, timer); 475 set_running_timer(base, timer);
467 smp_wmb(); 476 detach_timer(timer, 1);
468 timer->base = NULL; 477 spin_unlock_irq(&base->t_base.lock);
469 spin_unlock_irq(&base->lock);
470 { 478 {
471 u32 preempt_count = preempt_count(); 479 u32 preempt_count = preempt_count();
472 fn(data); 480 fn(data);
@@ -475,12 +483,11 @@ repeat:
475 BUG(); 483 BUG();
476 } 484 }
477 } 485 }
478 spin_lock_irq(&base->lock); 486 spin_lock_irq(&base->t_base.lock);
479 goto repeat;
480 } 487 }
481 } 488 }
482 set_running_timer(base, NULL); 489 set_running_timer(base, NULL);
483 spin_unlock_irq(&base->lock); 490 spin_unlock_irq(&base->t_base.lock);
484} 491}
485 492
486#ifdef CONFIG_NO_IDLE_HZ 493#ifdef CONFIG_NO_IDLE_HZ
@@ -499,7 +506,7 @@ unsigned long next_timer_interrupt(void)
499 int i, j; 506 int i, j;
500 507
501 base = &__get_cpu_var(tvec_bases); 508 base = &__get_cpu_var(tvec_bases);
502 spin_lock(&base->lock); 509 spin_lock(&base->t_base.lock);
503 expires = base->timer_jiffies + (LONG_MAX >> 1); 510 expires = base->timer_jiffies + (LONG_MAX >> 1);
504 list = 0; 511 list = 0;
505 512
@@ -547,7 +554,7 @@ found:
547 expires = nte->expires; 554 expires = nte->expires;
548 } 555 }
549 } 556 }
550 spin_unlock(&base->lock); 557 spin_unlock(&base->t_base.lock);
551 return expires; 558 return expires;
552} 559}
553#endif 560#endif
@@ -1286,9 +1293,9 @@ static void __devinit init_timers_cpu(int cpu)
1286{ 1293{
1287 int j; 1294 int j;
1288 tvec_base_t *base; 1295 tvec_base_t *base;
1289 1296
1290 base = &per_cpu(tvec_bases, cpu); 1297 base = &per_cpu(tvec_bases, cpu);
1291 spin_lock_init(&base->lock); 1298 spin_lock_init(&base->t_base.lock);
1292 for (j = 0; j < TVN_SIZE; j++) { 1299 for (j = 0; j < TVN_SIZE; j++) {
1293 INIT_LIST_HEAD(base->tv5.vec + j); 1300 INIT_LIST_HEAD(base->tv5.vec + j);
1294 INIT_LIST_HEAD(base->tv4.vec + j); 1301 INIT_LIST_HEAD(base->tv4.vec + j);
@@ -1302,22 +1309,16 @@ static void __devinit init_timers_cpu(int cpu)
1302} 1309}
1303 1310
1304#ifdef CONFIG_HOTPLUG_CPU 1311#ifdef CONFIG_HOTPLUG_CPU
1305static int migrate_timer_list(tvec_base_t *new_base, struct list_head *head) 1312static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
1306{ 1313{
1307 struct timer_list *timer; 1314 struct timer_list *timer;
1308 1315
1309 while (!list_empty(head)) { 1316 while (!list_empty(head)) {
1310 timer = list_entry(head->next, struct timer_list, entry); 1317 timer = list_entry(head->next, struct timer_list, entry);
1311 /* We're locking backwards from __mod_timer order here, 1318 detach_timer(timer, 0);
1312 beware deadlock. */ 1319 timer->base = &new_base->t_base;
1313 if (!spin_trylock(&timer->lock))
1314 return 0;
1315 list_del(&timer->entry);
1316 internal_add_timer(new_base, timer); 1320 internal_add_timer(new_base, timer);
1317 timer->base = new_base;
1318 spin_unlock(&timer->lock);
1319 } 1321 }
1320 return 1;
1321} 1322}
1322 1323
1323static void __devinit migrate_timers(int cpu) 1324static void __devinit migrate_timers(int cpu)
@@ -1331,39 +1332,24 @@ static void __devinit migrate_timers(int cpu)
1331 new_base = &get_cpu_var(tvec_bases); 1332 new_base = &get_cpu_var(tvec_bases);
1332 1333
1333 local_irq_disable(); 1334 local_irq_disable();
1334again: 1335 spin_lock(&new_base->t_base.lock);
1335 /* Prevent deadlocks via ordering by old_base < new_base. */ 1336 spin_lock(&old_base->t_base.lock);
1336 if (old_base < new_base) {
1337 spin_lock(&new_base->lock);
1338 spin_lock(&old_base->lock);
1339 } else {
1340 spin_lock(&old_base->lock);
1341 spin_lock(&new_base->lock);
1342 }
1343 1337
1344 if (old_base->running_timer) 1338 if (old_base->t_base.running_timer)
1345 BUG(); 1339 BUG();
1346 for (i = 0; i < TVR_SIZE; i++) 1340 for (i = 0; i < TVR_SIZE; i++)
1347 if (!migrate_timer_list(new_base, old_base->tv1.vec + i)) 1341 migrate_timer_list(new_base, old_base->tv1.vec + i);
1348 goto unlock_again; 1342 for (i = 0; i < TVN_SIZE; i++) {
1349 for (i = 0; i < TVN_SIZE; i++) 1343 migrate_timer_list(new_base, old_base->tv2.vec + i);
1350 if (!migrate_timer_list(new_base, old_base->tv2.vec + i) 1344 migrate_timer_list(new_base, old_base->tv3.vec + i);
1351 || !migrate_timer_list(new_base, old_base->tv3.vec + i) 1345 migrate_timer_list(new_base, old_base->tv4.vec + i);
1352 || !migrate_timer_list(new_base, old_base->tv4.vec + i) 1346 migrate_timer_list(new_base, old_base->tv5.vec + i);
1353 || !migrate_timer_list(new_base, old_base->tv5.vec + i)) 1347 }
1354 goto unlock_again; 1348
1355 spin_unlock(&old_base->lock); 1349 spin_unlock(&old_base->t_base.lock);
1356 spin_unlock(&new_base->lock); 1350 spin_unlock(&new_base->t_base.lock);
1357 local_irq_enable(); 1351 local_irq_enable();
1358 put_cpu_var(tvec_bases); 1352 put_cpu_var(tvec_bases);
1359 return;
1360
1361unlock_again:
1362 /* Avoid deadlock with __mod_timer, by backing off. */
1363 spin_unlock(&old_base->lock);
1364 spin_unlock(&new_base->lock);
1365 cpu_relax();
1366 goto again;
1367} 1353}
1368#endif /* CONFIG_HOTPLUG_CPU */ 1354#endif /* CONFIG_HOTPLUG_CPU */
1369 1355