diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-09-30 11:39:18 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-09-30 11:39:28 -0400 |
commit | cf4b0b2c9520728e170f7a3061e24dbae0b56ed4 (patch) | |
tree | 84b014876a95340c66755bbbf6e943478bc4419a | |
parent | 94aca1dac6f6d21f4b07e4864baf7768cabcc6e7 (diff) | |
parent | ccc7dadf736639da86f3e0c86832c11a66fc8221 (diff) |
Merge branch 'timers-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'timers-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
hrtimer: prevent migration of per CPU hrtimers
hrtimer: mark migration state
hrtimer: fix migration of CB_IRQSAFE_NO_SOFTIRQ hrtimers
hrtimer: migrate pending list on cpu offline
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Tested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-rw-r--r-- | include/linux/hrtimer.h | 18 | ||||
-rw-r--r-- | kernel/hrtimer.c | 95 | ||||
-rw-r--r-- | kernel/sched.c | 4 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_sysprof.c | 2 |
5 files changed, 103 insertions, 18 deletions
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 6d93dce61cbb..2f245fe63bda 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h | |||
@@ -47,14 +47,22 @@ enum hrtimer_restart { | |||
47 | * HRTIMER_CB_IRQSAFE: Callback may run in hardirq context | 47 | * HRTIMER_CB_IRQSAFE: Callback may run in hardirq context |
48 | * HRTIMER_CB_IRQSAFE_NO_RESTART: Callback may run in hardirq context and | 48 | * HRTIMER_CB_IRQSAFE_NO_RESTART: Callback may run in hardirq context and |
49 | * does not restart the timer | 49 | * does not restart the timer |
50 | * HRTIMER_CB_IRQSAFE_NO_SOFTIRQ: Callback must run in hardirq context | 50 | * HRTIMER_CB_IRQSAFE_PERCPU: Callback must run in hardirq context |
51 | * Special mode for tick emultation | 51 | * Special mode for tick emulation and |
52 | * scheduler timer. Such timers are per | ||
53 | * cpu and not allowed to be migrated on | ||
54 | * cpu unplug. | ||
55 | * HRTIMER_CB_IRQSAFE_UNLOCKED: Callback should run in hardirq context | ||
56 | * with timer->base lock unlocked | ||
57 | * used for timers which call wakeup to | ||
58 | * avoid lock order problems with rq->lock | ||
52 | */ | 59 | */ |
53 | enum hrtimer_cb_mode { | 60 | enum hrtimer_cb_mode { |
54 | HRTIMER_CB_SOFTIRQ, | 61 | HRTIMER_CB_SOFTIRQ, |
55 | HRTIMER_CB_IRQSAFE, | 62 | HRTIMER_CB_IRQSAFE, |
56 | HRTIMER_CB_IRQSAFE_NO_RESTART, | 63 | HRTIMER_CB_IRQSAFE_NO_RESTART, |
57 | HRTIMER_CB_IRQSAFE_NO_SOFTIRQ, | 64 | HRTIMER_CB_IRQSAFE_PERCPU, |
65 | HRTIMER_CB_IRQSAFE_UNLOCKED, | ||
58 | }; | 66 | }; |
59 | 67 | ||
60 | /* | 68 | /* |
@@ -67,9 +75,10 @@ enum hrtimer_cb_mode { | |||
67 | * 0x02 callback function running | 75 | * 0x02 callback function running |
68 | * 0x04 callback pending (high resolution mode) | 76 | * 0x04 callback pending (high resolution mode) |
69 | * | 77 | * |
70 | * Special case: | 78 | * Special cases: |
71 | * 0x03 callback function running and enqueued | 79 | * 0x03 callback function running and enqueued |
72 | * (was requeued on another CPU) | 80 | * (was requeued on another CPU) |
81 | * 0x09 timer was migrated on CPU hotunplug | ||
73 | * The "callback function running and enqueued" status is only possible on | 82 | * The "callback function running and enqueued" status is only possible on |
74 | * SMP. It happens for example when a posix timer expired and the callback | 83 | * SMP. It happens for example when a posix timer expired and the callback |
75 | * queued a signal. Between dropping the lock which protects the posix timer | 84 | * queued a signal. Between dropping the lock which protects the posix timer |
@@ -87,6 +96,7 @@ enum hrtimer_cb_mode { | |||
87 | #define HRTIMER_STATE_ENQUEUED 0x01 | 96 | #define HRTIMER_STATE_ENQUEUED 0x01 |
88 | #define HRTIMER_STATE_CALLBACK 0x02 | 97 | #define HRTIMER_STATE_CALLBACK 0x02 |
89 | #define HRTIMER_STATE_PENDING 0x04 | 98 | #define HRTIMER_STATE_PENDING 0x04 |
99 | #define HRTIMER_STATE_MIGRATE 0x08 | ||
90 | 100 | ||
91 | /** | 101 | /** |
92 | * struct hrtimer - the basic hrtimer structure | 102 | * struct hrtimer - the basic hrtimer structure |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index b8e4dce80a74..cdec83e722fa 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -672,13 +672,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, | |||
672 | */ | 672 | */ |
673 | BUG_ON(timer->function(timer) != HRTIMER_NORESTART); | 673 | BUG_ON(timer->function(timer) != HRTIMER_NORESTART); |
674 | return 1; | 674 | return 1; |
675 | case HRTIMER_CB_IRQSAFE_NO_SOFTIRQ: | 675 | case HRTIMER_CB_IRQSAFE_PERCPU: |
676 | case HRTIMER_CB_IRQSAFE_UNLOCKED: | ||
676 | /* | 677 | /* |
677 | * This is solely for the sched tick emulation with | 678 | * This is solely for the sched tick emulation with |
678 | * dynamic tick support to ensure that we do not | 679 | * dynamic tick support to ensure that we do not |
679 | * restart the tick right on the edge and end up with | 680 | * restart the tick right on the edge and end up with |
680 | * the tick timer in the softirq ! The calling site | 681 | * the tick timer in the softirq ! The calling site |
681 | * takes care of this. | 682 | * takes care of this. Also used for hrtimer sleeper ! |
682 | */ | 683 | */ |
683 | debug_hrtimer_deactivate(timer); | 684 | debug_hrtimer_deactivate(timer); |
684 | return 1; | 685 | return 1; |
@@ -1245,7 +1246,8 @@ static void __run_hrtimer(struct hrtimer *timer) | |||
1245 | timer_stats_account_hrtimer(timer); | 1246 | timer_stats_account_hrtimer(timer); |
1246 | 1247 | ||
1247 | fn = timer->function; | 1248 | fn = timer->function; |
1248 | if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) { | 1249 | if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU || |
1250 | timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) { | ||
1249 | /* | 1251 | /* |
1250 | * Used for scheduler timers, avoid lock inversion with | 1252 | * Used for scheduler timers, avoid lock inversion with |
1251 | * rq->lock and tasklist_lock. | 1253 | * rq->lock and tasklist_lock. |
@@ -1452,7 +1454,7 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) | |||
1452 | sl->timer.function = hrtimer_wakeup; | 1454 | sl->timer.function = hrtimer_wakeup; |
1453 | sl->task = task; | 1455 | sl->task = task; |
1454 | #ifdef CONFIG_HIGH_RES_TIMERS | 1456 | #ifdef CONFIG_HIGH_RES_TIMERS |
1455 | sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; | 1457 | sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED; |
1456 | #endif | 1458 | #endif |
1457 | } | 1459 | } |
1458 | 1460 | ||
@@ -1591,29 +1593,95 @@ static void __cpuinit init_hrtimers_cpu(int cpu) | |||
1591 | 1593 | ||
1592 | #ifdef CONFIG_HOTPLUG_CPU | 1594 | #ifdef CONFIG_HOTPLUG_CPU |
1593 | 1595 | ||
1594 | static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, | 1596 | static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base, |
1595 | struct hrtimer_clock_base *new_base) | 1597 | struct hrtimer_clock_base *new_base, int dcpu) |
1596 | { | 1598 | { |
1597 | struct hrtimer *timer; | 1599 | struct hrtimer *timer; |
1598 | struct rb_node *node; | 1600 | struct rb_node *node; |
1601 | int raise = 0; | ||
1599 | 1602 | ||
1600 | while ((node = rb_first(&old_base->active))) { | 1603 | while ((node = rb_first(&old_base->active))) { |
1601 | timer = rb_entry(node, struct hrtimer, node); | 1604 | timer = rb_entry(node, struct hrtimer, node); |
1602 | BUG_ON(hrtimer_callback_running(timer)); | 1605 | BUG_ON(hrtimer_callback_running(timer)); |
1603 | debug_hrtimer_deactivate(timer); | 1606 | debug_hrtimer_deactivate(timer); |
1604 | __remove_hrtimer(timer, old_base, HRTIMER_STATE_INACTIVE, 0); | 1607 | |
1608 | /* | ||
1609 | * Should not happen. Per CPU timers should be | ||
1610 | * canceled _before_ the migration code is called | ||
1611 | */ | ||
1612 | if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU) { | ||
1613 | __remove_hrtimer(timer, old_base, | ||
1614 | HRTIMER_STATE_INACTIVE, 0); | ||
1615 | WARN(1, "hrtimer (%p %p)active but cpu %d dead\n", | ||
1616 | timer, timer->function, dcpu); | ||
1617 | continue; | ||
1618 | } | ||
1619 | |||
1620 | /* | ||
1621 | * Mark it as STATE_MIGRATE not INACTIVE otherwise the | ||
1622 | * timer could be seen as !active and just vanish away | ||
1623 | * under us on another CPU | ||
1624 | */ | ||
1625 | __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0); | ||
1605 | timer->base = new_base; | 1626 | timer->base = new_base; |
1606 | /* | 1627 | /* |
1607 | * Enqueue the timer. Allow reprogramming of the event device | 1628 | * Enqueue the timer. Allow reprogramming of the event device |
1608 | */ | 1629 | */ |
1609 | enqueue_hrtimer(timer, new_base, 1); | 1630 | enqueue_hrtimer(timer, new_base, 1); |
1631 | |||
1632 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
1633 | /* | ||
1634 | * Happens with high res enabled when the timer was | ||
1635 | * already expired and the callback mode is | ||
1636 | * HRTIMER_CB_IRQSAFE_UNLOCKED (hrtimer_sleeper). The | ||
1637 | * enqueue code does not move them to the soft irq | ||
1638 | * pending list for performance/latency reasons, but | ||
1639 | * in the migration state, we need to do that | ||
1640 | * otherwise we end up with a stale timer. | ||
1641 | */ | ||
1642 | if (timer->state == HRTIMER_STATE_MIGRATE) { | ||
1643 | timer->state = HRTIMER_STATE_PENDING; | ||
1644 | list_add_tail(&timer->cb_entry, | ||
1645 | &new_base->cpu_base->cb_pending); | ||
1646 | raise = 1; | ||
1647 | } | ||
1648 | #endif | ||
1649 | /* Clear the migration state bit */ | ||
1650 | timer->state &= ~HRTIMER_STATE_MIGRATE; | ||
1651 | } | ||
1652 | return raise; | ||
1653 | } | ||
1654 | |||
1655 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
1656 | static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base, | ||
1657 | struct hrtimer_cpu_base *new_base) | ||
1658 | { | ||
1659 | struct hrtimer *timer; | ||
1660 | int raise = 0; | ||
1661 | |||
1662 | while (!list_empty(&old_base->cb_pending)) { | ||
1663 | timer = list_entry(old_base->cb_pending.next, | ||
1664 | struct hrtimer, cb_entry); | ||
1665 | |||
1666 | __remove_hrtimer(timer, timer->base, HRTIMER_STATE_PENDING, 0); | ||
1667 | timer->base = &new_base->clock_base[timer->base->index]; | ||
1668 | list_add_tail(&timer->cb_entry, &new_base->cb_pending); | ||
1669 | raise = 1; | ||
1610 | } | 1670 | } |
1671 | return raise; | ||
1672 | } | ||
1673 | #else | ||
1674 | static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base, | ||
1675 | struct hrtimer_cpu_base *new_base) | ||
1676 | { | ||
1677 | return 0; | ||
1611 | } | 1678 | } |
1679 | #endif | ||
1612 | 1680 | ||
1613 | static void migrate_hrtimers(int cpu) | 1681 | static void migrate_hrtimers(int cpu) |
1614 | { | 1682 | { |
1615 | struct hrtimer_cpu_base *old_base, *new_base; | 1683 | struct hrtimer_cpu_base *old_base, *new_base; |
1616 | int i; | 1684 | int i, raise = 0; |
1617 | 1685 | ||
1618 | BUG_ON(cpu_online(cpu)); | 1686 | BUG_ON(cpu_online(cpu)); |
1619 | old_base = &per_cpu(hrtimer_bases, cpu); | 1687 | old_base = &per_cpu(hrtimer_bases, cpu); |
@@ -1626,14 +1694,21 @@ static void migrate_hrtimers(int cpu) | |||
1626 | spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); | 1694 | spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); |
1627 | 1695 | ||
1628 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { | 1696 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { |
1629 | migrate_hrtimer_list(&old_base->clock_base[i], | 1697 | if (migrate_hrtimer_list(&old_base->clock_base[i], |
1630 | &new_base->clock_base[i]); | 1698 | &new_base->clock_base[i], cpu)) |
1699 | raise = 1; | ||
1631 | } | 1700 | } |
1632 | 1701 | ||
1702 | if (migrate_hrtimer_pending(old_base, new_base)) | ||
1703 | raise = 1; | ||
1704 | |||
1633 | spin_unlock(&old_base->lock); | 1705 | spin_unlock(&old_base->lock); |
1634 | spin_unlock(&new_base->lock); | 1706 | spin_unlock(&new_base->lock); |
1635 | local_irq_enable(); | 1707 | local_irq_enable(); |
1636 | put_cpu_var(hrtimer_bases); | 1708 | put_cpu_var(hrtimer_bases); |
1709 | |||
1710 | if (raise) | ||
1711 | hrtimer_raise_softirq(); | ||
1637 | } | 1712 | } |
1638 | #endif /* CONFIG_HOTPLUG_CPU */ | 1713 | #endif /* CONFIG_HOTPLUG_CPU */ |
1639 | 1714 | ||
diff --git a/kernel/sched.c b/kernel/sched.c index 13dd2db9fb2d..ad1962dc0aa2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -201,7 +201,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) | |||
201 | hrtimer_init(&rt_b->rt_period_timer, | 201 | hrtimer_init(&rt_b->rt_period_timer, |
202 | CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 202 | CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
203 | rt_b->rt_period_timer.function = sched_rt_period_timer; | 203 | rt_b->rt_period_timer.function = sched_rt_period_timer; |
204 | rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; | 204 | rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED; |
205 | } | 205 | } |
206 | 206 | ||
207 | static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | 207 | static void start_rt_bandwidth(struct rt_bandwidth *rt_b) |
@@ -1119,7 +1119,7 @@ static void init_rq_hrtick(struct rq *rq) | |||
1119 | 1119 | ||
1120 | hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 1120 | hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
1121 | rq->hrtick_timer.function = hrtick; | 1121 | rq->hrtick_timer.function = hrtick; |
1122 | rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; | 1122 | rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU; |
1123 | } | 1123 | } |
1124 | #else | 1124 | #else |
1125 | static inline void hrtick_clear(struct rq *rq) | 1125 | static inline void hrtick_clear(struct rq *rq) |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 39019b3f7621..cb02324bdb88 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -625,7 +625,7 @@ void tick_setup_sched_timer(void) | |||
625 | */ | 625 | */ |
626 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 626 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
627 | ts->sched_timer.function = tick_sched_timer; | 627 | ts->sched_timer.function = tick_sched_timer; |
628 | ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; | 628 | ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU; |
629 | 629 | ||
630 | /* Get the next period (per cpu) */ | 630 | /* Get the next period (per cpu) */ |
631 | ts->sched_timer.expires = tick_init_jiffy_update(); | 631 | ts->sched_timer.expires = tick_init_jiffy_update(); |
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index bb948e52ce20..db58fb66a135 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c | |||
@@ -202,7 +202,7 @@ static void start_stack_timer(int cpu) | |||
202 | 202 | ||
203 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 203 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
204 | hrtimer->function = stack_trace_timer_fn; | 204 | hrtimer->function = stack_trace_timer_fn; |
205 | hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; | 205 | hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_PERCPU; |
206 | 206 | ||
207 | hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL); | 207 | hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL); |
208 | } | 208 | } |