diff options
author | Oleg Nesterov <oleg@tv-sign.ru> | 2005-06-23 03:08:56 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-23 12:45:16 -0400 |
commit | 55c888d6d09a0df236adfaf8ccf06ff5d0646775 (patch) | |
tree | deb9434abe3cf7c9b714ccb267ef5d943a847dfe /kernel | |
parent | bdd646a44672115c986593956aa4ef105485a184 (diff) |
[PATCH] timers fixes/improvements
This patch tries to solve following problems:
1. del_timer_sync() is racy. The timer can be fired again after
del_timer_sync have checked all cpus and before it will recheck
timer_pending().
2. It has scalability problems. All cpus are scanned to determine
if the timer is running on that cpu.
With this patch del_timer_sync is O(1) and no slower than plain
del_timer(pending_timer), unless it has to actually wait for
completion of the currently running timer.
The only restriction is that the recurring timer should not use
add_timer_on().
3. The timers are not serialized wrt to itself.
If CPU_0 does mod_timer(jiffies+1) while the timer is currently
running on CPU 1, it is quite possible that local interrupt on
CPU_0 will start that timer before it finished on CPU_1.
4. The timers locking is suboptimal. __mod_timer() takes 3 locks
at once and still requires wmb() in del_timer/run_timers.
The new implementation takes 2 locks sequentially and does not
need memory barriers.
Currently ->base != NULL means that the timer is pending. In that case
->base.lock is used to lock the timer. __mod_timer also takes timer->lock
because ->base can be == NULL.
This patch uses timer->entry.next != NULL as indication that the timer is
pending. So it does __list_del(), entry->next = NULL instead of list_del()
when the timer is deleted.
The ->base field is used for hashed locking only, it is initialized
in init_timer() which sets ->base = per_cpu(tvec_bases). When the
tvec_bases.lock is locked, it means that all timers which are tied
to this base via timer->base are locked, and the base itself is locked
too.
So __run_timers/migrate_timers can safely modify all timers which could
be found on ->tvX lists (pending timers).
When the timer's base is locked, and the timer removed from ->entry list
(which means that _run_timers/migrate_timers can't see this timer), it is
possible to set timer->base = NULL and drop the lock: the timer remains
locked.
This patch adds lock_timer_base() helper, which waits for ->base != NULL,
locks the ->base, and checks it is still the same.
__mod_timer() schedules the timer on the local CPU and changes it's base.
However, it does not lock both old and new bases at once. It locks the
timer via lock_timer_base(), deletes the timer, sets ->base = NULL, and
unlocks old base. Then __mod_timer() locks new_base, sets ->base = new_base,
and adds this timer. This simplifies the code, because AB-BA deadlock is not
possible. __mod_timer() also ensures that the timer's base is not changed
while the timer's handler is running on the old base.
__run_timers(), del_timer() do not change ->base anymore, they only clear
pending flag.
So del_timer_sync() can test timer->base->running_timer == timer to detect
whether it is running or not.
We don't need timer_list->lock anymore, this patch kills it.
We also don't need barriers. del_timer() and __run_timers() used smp_wmb()
before clearing timer's pending flag. It was needed because __mod_timer()
did not lock old_base if the timer is not pending, so __mod_timer()->list_add()
could race with del_timer()->list_del(). With this patch these functions are
serialized through base->lock.
One problem. TIMER_INITIALIZER can't use per_cpu(tvec_bases). So this patch
adds global
struct timer_base_s {
spinlock_t lock;
struct timer_list *running_timer;
} __init_timer_base;
which is used by TIMER_INITIALIZER. The corresponding fields in tvec_t_base_s
struct are replaced by struct timer_base_s t_base.
It is indeed ugly. But this can't have scalability problems. The global
__init_timer_base.lock is used only when __mod_timer() is called for the first
time AND the timer was compile time initialized. After that the timer migrates
to the local CPU.
Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Renaud Lienhart <renaud.lienhart@free.fr>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/timer.c | 328 |
1 files changed, 157 insertions, 171 deletions
diff --git a/kernel/timer.c b/kernel/timer.c index 207aa4f0aa10..8aadc62efd65 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -57,6 +57,11 @@ static void time_interpolator_update(long delta_nsec); | |||
57 | #define TVN_MASK (TVN_SIZE - 1) | 57 | #define TVN_MASK (TVN_SIZE - 1) |
58 | #define TVR_MASK (TVR_SIZE - 1) | 58 | #define TVR_MASK (TVR_SIZE - 1) |
59 | 59 | ||
60 | struct timer_base_s { | ||
61 | spinlock_t lock; | ||
62 | struct timer_list *running_timer; | ||
63 | }; | ||
64 | |||
60 | typedef struct tvec_s { | 65 | typedef struct tvec_s { |
61 | struct list_head vec[TVN_SIZE]; | 66 | struct list_head vec[TVN_SIZE]; |
62 | } tvec_t; | 67 | } tvec_t; |
@@ -66,9 +71,8 @@ typedef struct tvec_root_s { | |||
66 | } tvec_root_t; | 71 | } tvec_root_t; |
67 | 72 | ||
68 | struct tvec_t_base_s { | 73 | struct tvec_t_base_s { |
69 | spinlock_t lock; | 74 | struct timer_base_s t_base; |
70 | unsigned long timer_jiffies; | 75 | unsigned long timer_jiffies; |
71 | struct timer_list *running_timer; | ||
72 | tvec_root_t tv1; | 76 | tvec_root_t tv1; |
73 | tvec_t tv2; | 77 | tvec_t tv2; |
74 | tvec_t tv3; | 78 | tvec_t tv3; |
@@ -77,18 +81,16 @@ struct tvec_t_base_s { | |||
77 | } ____cacheline_aligned_in_smp; | 81 | } ____cacheline_aligned_in_smp; |
78 | 82 | ||
79 | typedef struct tvec_t_base_s tvec_base_t; | 83 | typedef struct tvec_t_base_s tvec_base_t; |
84 | static DEFINE_PER_CPU(tvec_base_t, tvec_bases); | ||
80 | 85 | ||
81 | static inline void set_running_timer(tvec_base_t *base, | 86 | static inline void set_running_timer(tvec_base_t *base, |
82 | struct timer_list *timer) | 87 | struct timer_list *timer) |
83 | { | 88 | { |
84 | #ifdef CONFIG_SMP | 89 | #ifdef CONFIG_SMP |
85 | base->running_timer = timer; | 90 | base->t_base.running_timer = timer; |
86 | #endif | 91 | #endif |
87 | } | 92 | } |
88 | 93 | ||
89 | /* Fake initialization */ | ||
90 | static DEFINE_PER_CPU(tvec_base_t, tvec_bases) = { SPIN_LOCK_UNLOCKED }; | ||
91 | |||
92 | static void check_timer_failed(struct timer_list *timer) | 94 | static void check_timer_failed(struct timer_list *timer) |
93 | { | 95 | { |
94 | static int whine_count; | 96 | static int whine_count; |
@@ -103,7 +105,6 @@ static void check_timer_failed(struct timer_list *timer) | |||
103 | /* | 105 | /* |
104 | * Now fix it up | 106 | * Now fix it up |
105 | */ | 107 | */ |
106 | spin_lock_init(&timer->lock); | ||
107 | timer->magic = TIMER_MAGIC; | 108 | timer->magic = TIMER_MAGIC; |
108 | } | 109 | } |
109 | 110 | ||
@@ -156,65 +157,113 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer) | |||
156 | list_add_tail(&timer->entry, vec); | 157 | list_add_tail(&timer->entry, vec); |
157 | } | 158 | } |
158 | 159 | ||
160 | typedef struct timer_base_s timer_base_t; | ||
161 | /* | ||
162 | * Used by TIMER_INITIALIZER, we can't use per_cpu(tvec_bases) | ||
163 | * at compile time, and we need timer->base to lock the timer. | ||
164 | */ | ||
165 | timer_base_t __init_timer_base | ||
166 | ____cacheline_aligned_in_smp = { .lock = SPIN_LOCK_UNLOCKED }; | ||
167 | EXPORT_SYMBOL(__init_timer_base); | ||
168 | |||
169 | /*** | ||
170 | * init_timer - initialize a timer. | ||
171 | * @timer: the timer to be initialized | ||
172 | * | ||
173 | * init_timer() must be done to a timer prior calling *any* of the | ||
174 | * other timer functions. | ||
175 | */ | ||
176 | void fastcall init_timer(struct timer_list *timer) | ||
177 | { | ||
178 | timer->entry.next = NULL; | ||
179 | timer->base = &per_cpu(tvec_bases, raw_smp_processor_id()).t_base; | ||
180 | timer->magic = TIMER_MAGIC; | ||
181 | } | ||
182 | EXPORT_SYMBOL(init_timer); | ||
183 | |||
184 | static inline void detach_timer(struct timer_list *timer, | ||
185 | int clear_pending) | ||
186 | { | ||
187 | struct list_head *entry = &timer->entry; | ||
188 | |||
189 | __list_del(entry->prev, entry->next); | ||
190 | if (clear_pending) | ||
191 | entry->next = NULL; | ||
192 | entry->prev = LIST_POISON2; | ||
193 | } | ||
194 | |||
195 | /* | ||
196 | * We are using hashed locking: holding per_cpu(tvec_bases).t_base.lock | ||
197 | * means that all timers which are tied to this base via timer->base are | ||
198 | * locked, and the base itself is locked too. | ||
199 | * | ||
200 | * So __run_timers/migrate_timers can safely modify all timers which could | ||
201 | * be found on ->tvX lists. | ||
202 | * | ||
203 | * When the timer's base is locked, and the timer removed from list, it is | ||
204 | * possible to set timer->base = NULL and drop the lock: the timer remains | ||
205 | * locked. | ||
206 | */ | ||
207 | static timer_base_t *lock_timer_base(struct timer_list *timer, | ||
208 | unsigned long *flags) | ||
209 | { | ||
210 | timer_base_t *base; | ||
211 | |||
212 | for (;;) { | ||
213 | base = timer->base; | ||
214 | if (likely(base != NULL)) { | ||
215 | spin_lock_irqsave(&base->lock, *flags); | ||
216 | if (likely(base == timer->base)) | ||
217 | return base; | ||
218 | /* The timer has migrated to another CPU */ | ||
219 | spin_unlock_irqrestore(&base->lock, *flags); | ||
220 | } | ||
221 | cpu_relax(); | ||
222 | } | ||
223 | } | ||
224 | |||
159 | int __mod_timer(struct timer_list *timer, unsigned long expires) | 225 | int __mod_timer(struct timer_list *timer, unsigned long expires) |
160 | { | 226 | { |
161 | tvec_base_t *old_base, *new_base; | 227 | timer_base_t *base; |
228 | tvec_base_t *new_base; | ||
162 | unsigned long flags; | 229 | unsigned long flags; |
163 | int ret = 0; | 230 | int ret = 0; |
164 | 231 | ||
165 | BUG_ON(!timer->function); | 232 | BUG_ON(!timer->function); |
166 | |||
167 | check_timer(timer); | 233 | check_timer(timer); |
168 | 234 | ||
169 | spin_lock_irqsave(&timer->lock, flags); | 235 | base = lock_timer_base(timer, &flags); |
236 | |||
237 | if (timer_pending(timer)) { | ||
238 | detach_timer(timer, 0); | ||
239 | ret = 1; | ||
240 | } | ||
241 | |||
170 | new_base = &__get_cpu_var(tvec_bases); | 242 | new_base = &__get_cpu_var(tvec_bases); |
171 | repeat: | ||
172 | old_base = timer->base; | ||
173 | 243 | ||
174 | /* | 244 | if (base != &new_base->t_base) { |
175 | * Prevent deadlocks via ordering by old_base < new_base. | ||
176 | */ | ||
177 | if (old_base && (new_base != old_base)) { | ||
178 | if (old_base < new_base) { | ||
179 | spin_lock(&new_base->lock); | ||
180 | spin_lock(&old_base->lock); | ||
181 | } else { | ||
182 | spin_lock(&old_base->lock); | ||
183 | spin_lock(&new_base->lock); | ||
184 | } | ||
185 | /* | 245 | /* |
186 | * The timer base might have been cancelled while we were | 246 | * We are trying to schedule the timer on the local CPU. |
187 | * trying to take the lock(s): | 247 | * However we can't change timer's base while it is running, |
248 | * otherwise del_timer_sync() can't detect that the timer's | ||
249 | * handler yet has not finished. This also guarantees that | ||
250 | * the timer is serialized wrt itself. | ||
188 | */ | 251 | */ |
189 | if (timer->base != old_base) { | 252 | if (unlikely(base->running_timer == timer)) { |
190 | spin_unlock(&new_base->lock); | 253 | /* The timer remains on a former base */ |
191 | spin_unlock(&old_base->lock); | 254 | new_base = container_of(base, tvec_base_t, t_base); |
192 | goto repeat; | 255 | } else { |
193 | } | 256 | /* See the comment in lock_timer_base() */ |
194 | } else { | 257 | timer->base = NULL; |
195 | spin_lock(&new_base->lock); | 258 | spin_unlock(&base->lock); |
196 | if (timer->base != old_base) { | 259 | spin_lock(&new_base->t_base.lock); |
197 | spin_unlock(&new_base->lock); | 260 | timer->base = &new_base->t_base; |
198 | goto repeat; | ||
199 | } | 261 | } |
200 | } | 262 | } |
201 | 263 | ||
202 | /* | ||
203 | * Delete the previous timeout (if there was any), and install | ||
204 | * the new one: | ||
205 | */ | ||
206 | if (old_base) { | ||
207 | list_del(&timer->entry); | ||
208 | ret = 1; | ||
209 | } | ||
210 | timer->expires = expires; | 264 | timer->expires = expires; |
211 | internal_add_timer(new_base, timer); | 265 | internal_add_timer(new_base, timer); |
212 | timer->base = new_base; | 266 | spin_unlock_irqrestore(&new_base->t_base.lock, flags); |
213 | |||
214 | if (old_base && (new_base != old_base)) | ||
215 | spin_unlock(&old_base->lock); | ||
216 | spin_unlock(&new_base->lock); | ||
217 | spin_unlock_irqrestore(&timer->lock, flags); | ||
218 | 267 | ||
219 | return ret; | 268 | return ret; |
220 | } | 269 | } |
@@ -232,15 +281,15 @@ void add_timer_on(struct timer_list *timer, int cpu) | |||
232 | { | 281 | { |
233 | tvec_base_t *base = &per_cpu(tvec_bases, cpu); | 282 | tvec_base_t *base = &per_cpu(tvec_bases, cpu); |
234 | unsigned long flags; | 283 | unsigned long flags; |
235 | 284 | ||
236 | BUG_ON(timer_pending(timer) || !timer->function); | 285 | BUG_ON(timer_pending(timer) || !timer->function); |
237 | 286 | ||
238 | check_timer(timer); | 287 | check_timer(timer); |
239 | 288 | ||
240 | spin_lock_irqsave(&base->lock, flags); | 289 | spin_lock_irqsave(&base->t_base.lock, flags); |
290 | timer->base = &base->t_base; | ||
241 | internal_add_timer(base, timer); | 291 | internal_add_timer(base, timer); |
242 | timer->base = base; | 292 | spin_unlock_irqrestore(&base->t_base.lock, flags); |
243 | spin_unlock_irqrestore(&base->lock, flags); | ||
244 | } | 293 | } |
245 | 294 | ||
246 | 295 | ||
@@ -295,27 +344,22 @@ EXPORT_SYMBOL(mod_timer); | |||
295 | */ | 344 | */ |
296 | int del_timer(struct timer_list *timer) | 345 | int del_timer(struct timer_list *timer) |
297 | { | 346 | { |
347 | timer_base_t *base; | ||
298 | unsigned long flags; | 348 | unsigned long flags; |
299 | tvec_base_t *base; | 349 | int ret = 0; |
300 | 350 | ||
301 | check_timer(timer); | 351 | check_timer(timer); |
302 | 352 | ||
303 | repeat: | 353 | if (timer_pending(timer)) { |
304 | base = timer->base; | 354 | base = lock_timer_base(timer, &flags); |
305 | if (!base) | 355 | if (timer_pending(timer)) { |
306 | return 0; | 356 | detach_timer(timer, 1); |
307 | spin_lock_irqsave(&base->lock, flags); | 357 | ret = 1; |
308 | if (base != timer->base) { | 358 | } |
309 | spin_unlock_irqrestore(&base->lock, flags); | 359 | spin_unlock_irqrestore(&base->lock, flags); |
310 | goto repeat; | ||
311 | } | 360 | } |
312 | list_del(&timer->entry); | ||
313 | /* Need to make sure that anybody who sees a NULL base also sees the list ops */ | ||
314 | smp_wmb(); | ||
315 | timer->base = NULL; | ||
316 | spin_unlock_irqrestore(&base->lock, flags); | ||
317 | 361 | ||
318 | return 1; | 362 | return ret; |
319 | } | 363 | } |
320 | 364 | ||
321 | EXPORT_SYMBOL(del_timer); | 365 | EXPORT_SYMBOL(del_timer); |
@@ -332,72 +376,39 @@ EXPORT_SYMBOL(del_timer); | |||
332 | * Synchronization rules: callers must prevent restarting of the timer, | 376 | * Synchronization rules: callers must prevent restarting of the timer, |
333 | * otherwise this function is meaningless. It must not be called from | 377 | * otherwise this function is meaningless. It must not be called from |
334 | * interrupt contexts. The caller must not hold locks which would prevent | 378 | * interrupt contexts. The caller must not hold locks which would prevent |
335 | * completion of the timer's handler. Upon exit the timer is not queued and | 379 | * completion of the timer's handler. The timer's handler must not call |
336 | * the handler is not running on any CPU. | 380 | * add_timer_on(). Upon exit the timer is not queued and the handler is |
381 | * not running on any CPU. | ||
337 | * | 382 | * |
338 | * The function returns whether it has deactivated a pending timer or not. | 383 | * The function returns whether it has deactivated a pending timer or not. |
339 | * | ||
340 | * del_timer_sync() is slow and complicated because it copes with timer | ||
341 | * handlers which re-arm the timer (periodic timers). If the timer handler | ||
342 | * is known to not do this (a single shot timer) then use | ||
343 | * del_singleshot_timer_sync() instead. | ||
344 | */ | 384 | */ |
345 | int del_timer_sync(struct timer_list *timer) | 385 | int del_timer_sync(struct timer_list *timer) |
346 | { | 386 | { |
347 | tvec_base_t *base; | 387 | timer_base_t *base; |
348 | int i, ret = 0; | 388 | unsigned long flags; |
389 | int ret = -1; | ||
349 | 390 | ||
350 | check_timer(timer); | 391 | check_timer(timer); |
351 | 392 | ||
352 | del_again: | 393 | do { |
353 | ret += del_timer(timer); | 394 | base = lock_timer_base(timer, &flags); |
354 | 395 | ||
355 | for_each_online_cpu(i) { | 396 | if (base->running_timer == timer) |
356 | base = &per_cpu(tvec_bases, i); | 397 | goto unlock; |
357 | if (base->running_timer == timer) { | 398 | |
358 | while (base->running_timer == timer) { | 399 | ret = 0; |
359 | cpu_relax(); | 400 | if (timer_pending(timer)) { |
360 | preempt_check_resched(); | 401 | detach_timer(timer, 1); |
361 | } | 402 | ret = 1; |
362 | break; | ||
363 | } | 403 | } |
364 | } | 404 | unlock: |
365 | smp_rmb(); | 405 | spin_unlock_irqrestore(&base->lock, flags); |
366 | if (timer_pending(timer)) | 406 | } while (ret < 0); |
367 | goto del_again; | ||
368 | 407 | ||
369 | return ret; | 408 | return ret; |
370 | } | 409 | } |
371 | EXPORT_SYMBOL(del_timer_sync); | ||
372 | 410 | ||
373 | /*** | 411 | EXPORT_SYMBOL(del_timer_sync); |
374 | * del_singleshot_timer_sync - deactivate a non-recursive timer | ||
375 | * @timer: the timer to be deactivated | ||
376 | * | ||
377 | * This function is an optimization of del_timer_sync for the case where the | ||
378 | * caller can guarantee the timer does not reschedule itself in its timer | ||
379 | * function. | ||
380 | * | ||
381 | * Synchronization rules: callers must prevent restarting of the timer, | ||
382 | * otherwise this function is meaningless. It must not be called from | ||
383 | * interrupt contexts. The caller must not hold locks which wold prevent | ||
384 | * completion of the timer's handler. Upon exit the timer is not queued and | ||
385 | * the handler is not running on any CPU. | ||
386 | * | ||
387 | * The function returns whether it has deactivated a pending timer or not. | ||
388 | */ | ||
389 | int del_singleshot_timer_sync(struct timer_list *timer) | ||
390 | { | ||
391 | int ret = del_timer(timer); | ||
392 | |||
393 | if (!ret) { | ||
394 | ret = del_timer_sync(timer); | ||
395 | BUG_ON(ret); | ||
396 | } | ||
397 | |||
398 | return ret; | ||
399 | } | ||
400 | EXPORT_SYMBOL(del_singleshot_timer_sync); | ||
401 | #endif | 412 | #endif |
402 | 413 | ||
403 | static int cascade(tvec_base_t *base, tvec_t *tv, int index) | 414 | static int cascade(tvec_base_t *base, tvec_t *tv, int index) |
@@ -415,7 +426,7 @@ static int cascade(tvec_base_t *base, tvec_t *tv, int index) | |||
415 | struct timer_list *tmp; | 426 | struct timer_list *tmp; |
416 | 427 | ||
417 | tmp = list_entry(curr, struct timer_list, entry); | 428 | tmp = list_entry(curr, struct timer_list, entry); |
418 | BUG_ON(tmp->base != base); | 429 | BUG_ON(tmp->base != &base->t_base); |
419 | curr = curr->next; | 430 | curr = curr->next; |
420 | internal_add_timer(base, tmp); | 431 | internal_add_timer(base, tmp); |
421 | } | 432 | } |
@@ -437,7 +448,7 @@ static inline void __run_timers(tvec_base_t *base) | |||
437 | { | 448 | { |
438 | struct timer_list *timer; | 449 | struct timer_list *timer; |
439 | 450 | ||
440 | spin_lock_irq(&base->lock); | 451 | spin_lock_irq(&base->t_base.lock); |
441 | while (time_after_eq(jiffies, base->timer_jiffies)) { | 452 | while (time_after_eq(jiffies, base->timer_jiffies)) { |
442 | struct list_head work_list = LIST_HEAD_INIT(work_list); | 453 | struct list_head work_list = LIST_HEAD_INIT(work_list); |
443 | struct list_head *head = &work_list; | 454 | struct list_head *head = &work_list; |
@@ -453,8 +464,7 @@ static inline void __run_timers(tvec_base_t *base) | |||
453 | cascade(base, &base->tv5, INDEX(3)); | 464 | cascade(base, &base->tv5, INDEX(3)); |
454 | ++base->timer_jiffies; | 465 | ++base->timer_jiffies; |
455 | list_splice_init(base->tv1.vec + index, &work_list); | 466 | list_splice_init(base->tv1.vec + index, &work_list); |
456 | repeat: | 467 | while (!list_empty(head)) { |
457 | if (!list_empty(head)) { | ||
458 | void (*fn)(unsigned long); | 468 | void (*fn)(unsigned long); |
459 | unsigned long data; | 469 | unsigned long data; |
460 | 470 | ||
@@ -462,11 +472,9 @@ repeat: | |||
462 | fn = timer->function; | 472 | fn = timer->function; |
463 | data = timer->data; | 473 | data = timer->data; |
464 | 474 | ||
465 | list_del(&timer->entry); | ||
466 | set_running_timer(base, timer); | 475 | set_running_timer(base, timer); |
467 | smp_wmb(); | 476 | detach_timer(timer, 1); |
468 | timer->base = NULL; | 477 | spin_unlock_irq(&base->t_base.lock); |
469 | spin_unlock_irq(&base->lock); | ||
470 | { | 478 | { |
471 | u32 preempt_count = preempt_count(); | 479 | u32 preempt_count = preempt_count(); |
472 | fn(data); | 480 | fn(data); |
@@ -475,12 +483,11 @@ repeat: | |||
475 | BUG(); | 483 | BUG(); |
476 | } | 484 | } |
477 | } | 485 | } |
478 | spin_lock_irq(&base->lock); | 486 | spin_lock_irq(&base->t_base.lock); |
479 | goto repeat; | ||
480 | } | 487 | } |
481 | } | 488 | } |
482 | set_running_timer(base, NULL); | 489 | set_running_timer(base, NULL); |
483 | spin_unlock_irq(&base->lock); | 490 | spin_unlock_irq(&base->t_base.lock); |
484 | } | 491 | } |
485 | 492 | ||
486 | #ifdef CONFIG_NO_IDLE_HZ | 493 | #ifdef CONFIG_NO_IDLE_HZ |
@@ -499,7 +506,7 @@ unsigned long next_timer_interrupt(void) | |||
499 | int i, j; | 506 | int i, j; |
500 | 507 | ||
501 | base = &__get_cpu_var(tvec_bases); | 508 | base = &__get_cpu_var(tvec_bases); |
502 | spin_lock(&base->lock); | 509 | spin_lock(&base->t_base.lock); |
503 | expires = base->timer_jiffies + (LONG_MAX >> 1); | 510 | expires = base->timer_jiffies + (LONG_MAX >> 1); |
504 | list = 0; | 511 | list = 0; |
505 | 512 | ||
@@ -547,7 +554,7 @@ found: | |||
547 | expires = nte->expires; | 554 | expires = nte->expires; |
548 | } | 555 | } |
549 | } | 556 | } |
550 | spin_unlock(&base->lock); | 557 | spin_unlock(&base->t_base.lock); |
551 | return expires; | 558 | return expires; |
552 | } | 559 | } |
553 | #endif | 560 | #endif |
@@ -1286,9 +1293,9 @@ static void __devinit init_timers_cpu(int cpu) | |||
1286 | { | 1293 | { |
1287 | int j; | 1294 | int j; |
1288 | tvec_base_t *base; | 1295 | tvec_base_t *base; |
1289 | 1296 | ||
1290 | base = &per_cpu(tvec_bases, cpu); | 1297 | base = &per_cpu(tvec_bases, cpu); |
1291 | spin_lock_init(&base->lock); | 1298 | spin_lock_init(&base->t_base.lock); |
1292 | for (j = 0; j < TVN_SIZE; j++) { | 1299 | for (j = 0; j < TVN_SIZE; j++) { |
1293 | INIT_LIST_HEAD(base->tv5.vec + j); | 1300 | INIT_LIST_HEAD(base->tv5.vec + j); |
1294 | INIT_LIST_HEAD(base->tv4.vec + j); | 1301 | INIT_LIST_HEAD(base->tv4.vec + j); |
@@ -1302,22 +1309,16 @@ static void __devinit init_timers_cpu(int cpu) | |||
1302 | } | 1309 | } |
1303 | 1310 | ||
1304 | #ifdef CONFIG_HOTPLUG_CPU | 1311 | #ifdef CONFIG_HOTPLUG_CPU |
1305 | static int migrate_timer_list(tvec_base_t *new_base, struct list_head *head) | 1312 | static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head) |
1306 | { | 1313 | { |
1307 | struct timer_list *timer; | 1314 | struct timer_list *timer; |
1308 | 1315 | ||
1309 | while (!list_empty(head)) { | 1316 | while (!list_empty(head)) { |
1310 | timer = list_entry(head->next, struct timer_list, entry); | 1317 | timer = list_entry(head->next, struct timer_list, entry); |
1311 | /* We're locking backwards from __mod_timer order here, | 1318 | detach_timer(timer, 0); |
1312 | beware deadlock. */ | 1319 | timer->base = &new_base->t_base; |
1313 | if (!spin_trylock(&timer->lock)) | ||
1314 | return 0; | ||
1315 | list_del(&timer->entry); | ||
1316 | internal_add_timer(new_base, timer); | 1320 | internal_add_timer(new_base, timer); |
1317 | timer->base = new_base; | ||
1318 | spin_unlock(&timer->lock); | ||
1319 | } | 1321 | } |
1320 | return 1; | ||
1321 | } | 1322 | } |
1322 | 1323 | ||
1323 | static void __devinit migrate_timers(int cpu) | 1324 | static void __devinit migrate_timers(int cpu) |
@@ -1331,39 +1332,24 @@ static void __devinit migrate_timers(int cpu) | |||
1331 | new_base = &get_cpu_var(tvec_bases); | 1332 | new_base = &get_cpu_var(tvec_bases); |
1332 | 1333 | ||
1333 | local_irq_disable(); | 1334 | local_irq_disable(); |
1334 | again: | 1335 | spin_lock(&new_base->t_base.lock); |
1335 | /* Prevent deadlocks via ordering by old_base < new_base. */ | 1336 | spin_lock(&old_base->t_base.lock); |
1336 | if (old_base < new_base) { | ||
1337 | spin_lock(&new_base->lock); | ||
1338 | spin_lock(&old_base->lock); | ||
1339 | } else { | ||
1340 | spin_lock(&old_base->lock); | ||
1341 | spin_lock(&new_base->lock); | ||
1342 | } | ||
1343 | 1337 | ||
1344 | if (old_base->running_timer) | 1338 | if (old_base->t_base.running_timer) |
1345 | BUG(); | 1339 | BUG(); |
1346 | for (i = 0; i < TVR_SIZE; i++) | 1340 | for (i = 0; i < TVR_SIZE; i++) |
1347 | if (!migrate_timer_list(new_base, old_base->tv1.vec + i)) | 1341 | migrate_timer_list(new_base, old_base->tv1.vec + i); |
1348 | goto unlock_again; | 1342 | for (i = 0; i < TVN_SIZE; i++) { |
1349 | for (i = 0; i < TVN_SIZE; i++) | 1343 | migrate_timer_list(new_base, old_base->tv2.vec + i); |
1350 | if (!migrate_timer_list(new_base, old_base->tv2.vec + i) | 1344 | migrate_timer_list(new_base, old_base->tv3.vec + i); |
1351 | || !migrate_timer_list(new_base, old_base->tv3.vec + i) | 1345 | migrate_timer_list(new_base, old_base->tv4.vec + i); |
1352 | || !migrate_timer_list(new_base, old_base->tv4.vec + i) | 1346 | migrate_timer_list(new_base, old_base->tv5.vec + i); |
1353 | || !migrate_timer_list(new_base, old_base->tv5.vec + i)) | 1347 | } |
1354 | goto unlock_again; | 1348 | |
1355 | spin_unlock(&old_base->lock); | 1349 | spin_unlock(&old_base->t_base.lock); |
1356 | spin_unlock(&new_base->lock); | 1350 | spin_unlock(&new_base->t_base.lock); |
1357 | local_irq_enable(); | 1351 | local_irq_enable(); |
1358 | put_cpu_var(tvec_bases); | 1352 | put_cpu_var(tvec_bases); |
1359 | return; | ||
1360 | |||
1361 | unlock_again: | ||
1362 | /* Avoid deadlock with __mod_timer, by backing off. */ | ||
1363 | spin_unlock(&old_base->lock); | ||
1364 | spin_unlock(&new_base->lock); | ||
1365 | cpu_relax(); | ||
1366 | goto again; | ||
1367 | } | 1353 | } |
1368 | #endif /* CONFIG_HOTPLUG_CPU */ | 1354 | #endif /* CONFIG_HOTPLUG_CPU */ |
1369 | 1355 | ||