diff options
-rw-r--r-- | include/linux/irq_work.h | 5 | ||||
-rw-r--r-- | include/linux/tick.h | 9 | ||||
-rw-r--r-- | kernel/irq_work.c | 76 | ||||
-rw-r--r-- | kernel/sched/core.c | 22 | ||||
-rw-r--r-- | kernel/sched/sched.h | 12 | ||||
-rw-r--r-- | kernel/smp.c | 9 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 10 |
7 files changed, 97 insertions, 46 deletions
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 19ae05d4b8ec..bf9422c3aefe 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h | |||
@@ -33,6 +33,11 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *)) | |||
33 | #define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { .func = (_f), } | 33 | #define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { .func = (_f), } |
34 | 34 | ||
35 | bool irq_work_queue(struct irq_work *work); | 35 | bool irq_work_queue(struct irq_work *work); |
36 | |||
37 | #ifdef CONFIG_SMP | ||
38 | bool irq_work_queue_on(struct irq_work *work, int cpu); | ||
39 | #endif | ||
40 | |||
36 | void irq_work_run(void); | 41 | void irq_work_run(void); |
37 | void irq_work_sync(struct irq_work *work); | 42 | void irq_work_sync(struct irq_work *work); |
38 | 43 | ||
diff --git a/include/linux/tick.h b/include/linux/tick.h index b84773cb9f4c..8a4987f2294a 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h | |||
@@ -181,7 +181,13 @@ static inline bool tick_nohz_full_cpu(int cpu) | |||
181 | 181 | ||
182 | extern void tick_nohz_init(void); | 182 | extern void tick_nohz_init(void); |
183 | extern void __tick_nohz_full_check(void); | 183 | extern void __tick_nohz_full_check(void); |
184 | extern void tick_nohz_full_kick(void); | 184 | extern void tick_nohz_full_kick_cpu(int cpu); |
185 | |||
186 | static inline void tick_nohz_full_kick(void) | ||
187 | { | ||
188 | tick_nohz_full_kick_cpu(smp_processor_id()); | ||
189 | } | ||
190 | |||
185 | extern void tick_nohz_full_kick_all(void); | 191 | extern void tick_nohz_full_kick_all(void); |
186 | extern void __tick_nohz_task_switch(struct task_struct *tsk); | 192 | extern void __tick_nohz_task_switch(struct task_struct *tsk); |
187 | #else | 193 | #else |
@@ -189,6 +195,7 @@ static inline void tick_nohz_init(void) { } | |||
189 | static inline bool tick_nohz_full_enabled(void) { return false; } | 195 | static inline bool tick_nohz_full_enabled(void) { return false; } |
190 | static inline bool tick_nohz_full_cpu(int cpu) { return false; } | 196 | static inline bool tick_nohz_full_cpu(int cpu) { return false; } |
191 | static inline void __tick_nohz_full_check(void) { } | 197 | static inline void __tick_nohz_full_check(void) { } |
198 | static inline void tick_nohz_full_kick_cpu(int cpu) { } | ||
192 | static inline void tick_nohz_full_kick(void) { } | 199 | static inline void tick_nohz_full_kick(void) { } |
193 | static inline void tick_nohz_full_kick_all(void) { } | 200 | static inline void tick_nohz_full_kick_all(void) { } |
194 | static inline void __tick_nohz_task_switch(struct task_struct *tsk) { } | 201 | static inline void __tick_nohz_task_switch(struct task_struct *tsk) { } |
diff --git a/kernel/irq_work.c b/kernel/irq_work.c index a82170e2fa78..4b0a890a304a 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c | |||
@@ -16,11 +16,12 @@ | |||
16 | #include <linux/tick.h> | 16 | #include <linux/tick.h> |
17 | #include <linux/cpu.h> | 17 | #include <linux/cpu.h> |
18 | #include <linux/notifier.h> | 18 | #include <linux/notifier.h> |
19 | #include <linux/smp.h> | ||
19 | #include <asm/processor.h> | 20 | #include <asm/processor.h> |
20 | 21 | ||
21 | 22 | ||
22 | static DEFINE_PER_CPU(struct llist_head, irq_work_list); | 23 | static DEFINE_PER_CPU(struct llist_head, raised_list); |
23 | static DEFINE_PER_CPU(int, irq_work_raised); | 24 | static DEFINE_PER_CPU(struct llist_head, lazy_list); |
24 | 25 | ||
25 | /* | 26 | /* |
26 | * Claim the entry so that no one else will poke at it. | 27 | * Claim the entry so that no one else will poke at it. |
@@ -55,12 +56,34 @@ void __weak arch_irq_work_raise(void) | |||
55 | */ | 56 | */ |
56 | } | 57 | } |
57 | 58 | ||
59 | #ifdef CONFIG_SMP | ||
58 | /* | 60 | /* |
59 | * Enqueue the irq_work @entry unless it's already pending | 61 | * Enqueue the irq_work @work on @cpu unless it's already pending |
60 | * somewhere. | 62 | * somewhere. |
61 | * | 63 | * |
62 | * Can be re-enqueued while the callback is still in progress. | 64 | * Can be re-enqueued while the callback is still in progress. |
63 | */ | 65 | */ |
66 | bool irq_work_queue_on(struct irq_work *work, int cpu) | ||
67 | { | ||
68 | /* All work should have been flushed before going offline */ | ||
69 | WARN_ON_ONCE(cpu_is_offline(cpu)); | ||
70 | |||
71 | /* Arch remote IPI send/receive backend aren't NMI safe */ | ||
72 | WARN_ON_ONCE(in_nmi()); | ||
73 | |||
74 | /* Only queue if not already pending */ | ||
75 | if (!irq_work_claim(work)) | ||
76 | return false; | ||
77 | |||
78 | if (llist_add(&work->llnode, &per_cpu(raised_list, cpu))) | ||
79 | arch_send_call_function_single_ipi(cpu); | ||
80 | |||
81 | return true; | ||
82 | } | ||
83 | EXPORT_SYMBOL_GPL(irq_work_queue_on); | ||
84 | #endif | ||
85 | |||
86 | /* Enqueue the irq work @work on the current CPU */ | ||
64 | bool irq_work_queue(struct irq_work *work) | 87 | bool irq_work_queue(struct irq_work *work) |
65 | { | 88 | { |
66 | /* Only queue if not already pending */ | 89 | /* Only queue if not already pending */ |
@@ -70,15 +93,13 @@ bool irq_work_queue(struct irq_work *work) | |||
70 | /* Queue the entry and raise the IPI if needed. */ | 93 | /* Queue the entry and raise the IPI if needed. */ |
71 | preempt_disable(); | 94 | preempt_disable(); |
72 | 95 | ||
73 | llist_add(&work->llnode, &__get_cpu_var(irq_work_list)); | 96 | /* If the work is "lazy", handle it from next tick if any */ |
74 | 97 | if (work->flags & IRQ_WORK_LAZY) { | |
75 | /* | 98 | if (llist_add(&work->llnode, &__get_cpu_var(lazy_list)) && |
76 | * If the work is not "lazy" or the tick is stopped, raise the irq | 99 | tick_nohz_tick_stopped()) |
77 | * work interrupt (if supported by the arch), otherwise, just wait | 100 | arch_irq_work_raise(); |
78 | * for the next tick. | 101 | } else { |
79 | */ | 102 | if (llist_add(&work->llnode, &__get_cpu_var(raised_list))) |
80 | if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) { | ||
81 | if (!this_cpu_cmpxchg(irq_work_raised, 0, 1)) | ||
82 | arch_irq_work_raise(); | 103 | arch_irq_work_raise(); |
83 | } | 104 | } |
84 | 105 | ||
@@ -90,10 +111,11 @@ EXPORT_SYMBOL_GPL(irq_work_queue); | |||
90 | 111 | ||
91 | bool irq_work_needs_cpu(void) | 112 | bool irq_work_needs_cpu(void) |
92 | { | 113 | { |
93 | struct llist_head *this_list; | 114 | struct llist_head *raised, *lazy; |
94 | 115 | ||
95 | this_list = &__get_cpu_var(irq_work_list); | 116 | raised = &__get_cpu_var(raised_list); |
96 | if (llist_empty(this_list)) | 117 | lazy = &__get_cpu_var(lazy_list); |
118 | if (llist_empty(raised) && llist_empty(lazy)) | ||
97 | return false; | 119 | return false; |
98 | 120 | ||
99 | /* All work should have been flushed before going offline */ | 121 | /* All work should have been flushed before going offline */ |
@@ -102,28 +124,18 @@ bool irq_work_needs_cpu(void) | |||
102 | return true; | 124 | return true; |
103 | } | 125 | } |
104 | 126 | ||
105 | static void __irq_work_run(void) | 127 | static void irq_work_run_list(struct llist_head *list) |
106 | { | 128 | { |
107 | unsigned long flags; | 129 | unsigned long flags; |
108 | struct irq_work *work; | 130 | struct irq_work *work; |
109 | struct llist_head *this_list; | ||
110 | struct llist_node *llnode; | 131 | struct llist_node *llnode; |
111 | 132 | ||
133 | BUG_ON(!irqs_disabled()); | ||
112 | 134 | ||
113 | /* | 135 | if (llist_empty(list)) |
114 | * Reset the "raised" state right before we check the list because | ||
115 | * an NMI may enqueue after we find the list empty from the runner. | ||
116 | */ | ||
117 | __this_cpu_write(irq_work_raised, 0); | ||
118 | barrier(); | ||
119 | |||
120 | this_list = &__get_cpu_var(irq_work_list); | ||
121 | if (llist_empty(this_list)) | ||
122 | return; | 136 | return; |
123 | 137 | ||
124 | BUG_ON(!irqs_disabled()); | 138 | llnode = llist_del_all(list); |
125 | |||
126 | llnode = llist_del_all(this_list); | ||
127 | while (llnode != NULL) { | 139 | while (llnode != NULL) { |
128 | work = llist_entry(llnode, struct irq_work, llnode); | 140 | work = llist_entry(llnode, struct irq_work, llnode); |
129 | 141 | ||
@@ -148,6 +160,12 @@ static void __irq_work_run(void) | |||
148 | } | 160 | } |
149 | } | 161 | } |
150 | 162 | ||
163 | static void __irq_work_run(void) | ||
164 | { | ||
165 | irq_work_run_list(&__get_cpu_var(raised_list)); | ||
166 | irq_work_run_list(&__get_cpu_var(lazy_list)); | ||
167 | } | ||
168 | |||
151 | /* | 169 | /* |
152 | * Run the irq_work entries on this cpu. Requires to be ran from hardirq | 170 | * Run the irq_work entries on this cpu. Requires to be ran from hardirq |
153 | * context with local IRQs disabled. | 171 | * context with local IRQs disabled. |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3bdf01b494fe..7f3063c153d8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -684,10 +684,16 @@ static void wake_up_idle_cpu(int cpu) | |||
684 | 684 | ||
685 | static bool wake_up_full_nohz_cpu(int cpu) | 685 | static bool wake_up_full_nohz_cpu(int cpu) |
686 | { | 686 | { |
687 | /* | ||
688 | * We just need the target to call irq_exit() and re-evaluate | ||
689 | * the next tick. The nohz full kick at least implies that. | ||
690 | * If needed we can still optimize that later with an | ||
691 | * empty IRQ. | ||
692 | */ | ||
687 | if (tick_nohz_full_cpu(cpu)) { | 693 | if (tick_nohz_full_cpu(cpu)) { |
688 | if (cpu != smp_processor_id() || | 694 | if (cpu != smp_processor_id() || |
689 | tick_nohz_tick_stopped()) | 695 | tick_nohz_tick_stopped()) |
690 | smp_send_reschedule(cpu); | 696 | tick_nohz_full_kick_cpu(cpu); |
691 | return true; | 697 | return true; |
692 | } | 698 | } |
693 | 699 | ||
@@ -734,10 +740,11 @@ bool sched_can_stop_tick(void) | |||
734 | 740 | ||
735 | rq = this_rq(); | 741 | rq = this_rq(); |
736 | 742 | ||
737 | /* Make sure rq->nr_running update is visible after the IPI */ | 743 | /* |
738 | smp_rmb(); | 744 | * More than one running task need preemption. |
739 | 745 | * nr_running update is assumed to be visible | |
740 | /* More than one running task need preemption */ | 746 | * after IPI is sent from wakers. |
747 | */ | ||
741 | if (rq->nr_running > 1) | 748 | if (rq->nr_running > 1) |
742 | return false; | 749 | return false; |
743 | 750 | ||
@@ -1568,9 +1575,7 @@ void scheduler_ipi(void) | |||
1568 | */ | 1575 | */ |
1569 | preempt_fold_need_resched(); | 1576 | preempt_fold_need_resched(); |
1570 | 1577 | ||
1571 | if (llist_empty(&this_rq()->wake_list) | 1578 | if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) |
1572 | && !tick_nohz_full_cpu(smp_processor_id()) | ||
1573 | && !got_nohz_idle_kick()) | ||
1574 | return; | 1579 | return; |
1575 | 1580 | ||
1576 | /* | 1581 | /* |
@@ -1587,7 +1592,6 @@ void scheduler_ipi(void) | |||
1587 | * somewhat pessimize the simple resched case. | 1592 | * somewhat pessimize the simple resched case. |
1588 | */ | 1593 | */ |
1589 | irq_enter(); | 1594 | irq_enter(); |
1590 | tick_nohz_full_check(); | ||
1591 | sched_ttwu_pending(); | 1595 | sched_ttwu_pending(); |
1592 | 1596 | ||
1593 | /* | 1597 | /* |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 31cc02ebc54e..eb8567610295 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -1221,9 +1221,15 @@ static inline void add_nr_running(struct rq *rq, unsigned count) | |||
1221 | #ifdef CONFIG_NO_HZ_FULL | 1221 | #ifdef CONFIG_NO_HZ_FULL |
1222 | if (prev_nr < 2 && rq->nr_running >= 2) { | 1222 | if (prev_nr < 2 && rq->nr_running >= 2) { |
1223 | if (tick_nohz_full_cpu(rq->cpu)) { | 1223 | if (tick_nohz_full_cpu(rq->cpu)) { |
1224 | /* Order rq->nr_running write against the IPI */ | 1224 | /* |
1225 | smp_wmb(); | 1225 | * Tick is needed if more than one task runs on a CPU. |
1226 | smp_send_reschedule(rq->cpu); | 1226 | * Send the target an IPI to kick it out of nohz mode. |
1227 | * | ||
1228 | * We assume that IPI implies full memory barrier and the | ||
1229 | * new value of rq->nr_running is visible on reception | ||
1230 | * from the target. | ||
1231 | */ | ||
1232 | tick_nohz_full_kick_cpu(rq->cpu); | ||
1227 | } | 1233 | } |
1228 | } | 1234 | } |
1229 | #endif | 1235 | #endif |
diff --git a/kernel/smp.c b/kernel/smp.c index 306f8180b0d5..a1812d184aed 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -3,6 +3,7 @@ | |||
3 | * | 3 | * |
4 | * (C) Jens Axboe <jens.axboe@oracle.com> 2008 | 4 | * (C) Jens Axboe <jens.axboe@oracle.com> 2008 |
5 | */ | 5 | */ |
6 | #include <linux/irq_work.h> | ||
6 | #include <linux/rcupdate.h> | 7 | #include <linux/rcupdate.h> |
7 | #include <linux/rculist.h> | 8 | #include <linux/rculist.h> |
8 | #include <linux/kernel.h> | 9 | #include <linux/kernel.h> |
@@ -210,6 +211,14 @@ void generic_smp_call_function_single_interrupt(void) | |||
210 | csd->func(csd->info); | 211 | csd->func(csd->info); |
211 | csd_unlock(csd); | 212 | csd_unlock(csd); |
212 | } | 213 | } |
214 | |||
215 | /* | ||
216 | * Handle irq works queued remotely by irq_work_queue_on(). | ||
217 | * Smp functions above are typically synchronous so they | ||
218 | * better run first since some other CPUs may be busy waiting | ||
219 | * for them. | ||
220 | */ | ||
221 | irq_work_run(); | ||
213 | } | 222 | } |
214 | 223 | ||
215 | /* | 224 | /* |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 6558b7ac112d..3d63944a3eca 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -224,13 +224,15 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { | |||
224 | }; | 224 | }; |
225 | 225 | ||
226 | /* | 226 | /* |
227 | * Kick the current CPU if it's full dynticks in order to force it to | 227 | * Kick the CPU if it's full dynticks in order to force it to |
228 | * re-evaluate its dependency on the tick and restart it if necessary. | 228 | * re-evaluate its dependency on the tick and restart it if necessary. |
229 | */ | 229 | */ |
230 | void tick_nohz_full_kick(void) | 230 | void tick_nohz_full_kick_cpu(int cpu) |
231 | { | 231 | { |
232 | if (tick_nohz_full_cpu(smp_processor_id())) | 232 | if (!tick_nohz_full_cpu(cpu)) |
233 | irq_work_queue(&__get_cpu_var(nohz_full_kick_work)); | 233 | return; |
234 | |||
235 | irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu); | ||
234 | } | 236 | } |
235 | 237 | ||
236 | static void nohz_full_kick_ipi(void *info) | 238 | static void nohz_full_kick_ipi(void *info) |