aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/irq_work.h5
-rw-r--r--include/linux/tick.h9
-rw-r--r--kernel/irq_work.c76
-rw-r--r--kernel/sched/core.c22
-rw-r--r--kernel/sched/sched.h12
-rw-r--r--kernel/smp.c9
-rw-r--r--kernel/time/tick-sched.c10
7 files changed, 97 insertions, 46 deletions
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index 19ae05d4b8ec..bf9422c3aefe 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -33,6 +33,11 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))
33#define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { .func = (_f), } 33#define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { .func = (_f), }
34 34
35bool irq_work_queue(struct irq_work *work); 35bool irq_work_queue(struct irq_work *work);
36
37#ifdef CONFIG_SMP
38bool irq_work_queue_on(struct irq_work *work, int cpu);
39#endif
40
36void irq_work_run(void); 41void irq_work_run(void);
37void irq_work_sync(struct irq_work *work); 42void irq_work_sync(struct irq_work *work);
38 43
diff --git a/include/linux/tick.h b/include/linux/tick.h
index b84773cb9f4c..8a4987f2294a 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -181,7 +181,13 @@ static inline bool tick_nohz_full_cpu(int cpu)
181 181
182extern void tick_nohz_init(void); 182extern void tick_nohz_init(void);
183extern void __tick_nohz_full_check(void); 183extern void __tick_nohz_full_check(void);
184extern void tick_nohz_full_kick(void); 184extern void tick_nohz_full_kick_cpu(int cpu);
185
186static inline void tick_nohz_full_kick(void)
187{
188 tick_nohz_full_kick_cpu(smp_processor_id());
189}
190
185extern void tick_nohz_full_kick_all(void); 191extern void tick_nohz_full_kick_all(void);
186extern void __tick_nohz_task_switch(struct task_struct *tsk); 192extern void __tick_nohz_task_switch(struct task_struct *tsk);
187#else 193#else
@@ -189,6 +195,7 @@ static inline void tick_nohz_init(void) { }
189static inline bool tick_nohz_full_enabled(void) { return false; } 195static inline bool tick_nohz_full_enabled(void) { return false; }
190static inline bool tick_nohz_full_cpu(int cpu) { return false; } 196static inline bool tick_nohz_full_cpu(int cpu) { return false; }
191static inline void __tick_nohz_full_check(void) { } 197static inline void __tick_nohz_full_check(void) { }
198static inline void tick_nohz_full_kick_cpu(int cpu) { }
192static inline void tick_nohz_full_kick(void) { } 199static inline void tick_nohz_full_kick(void) { }
193static inline void tick_nohz_full_kick_all(void) { } 200static inline void tick_nohz_full_kick_all(void) { }
194static inline void __tick_nohz_task_switch(struct task_struct *tsk) { } 201static inline void __tick_nohz_task_switch(struct task_struct *tsk) { }
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index a82170e2fa78..4b0a890a304a 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -16,11 +16,12 @@
16#include <linux/tick.h> 16#include <linux/tick.h>
17#include <linux/cpu.h> 17#include <linux/cpu.h>
18#include <linux/notifier.h> 18#include <linux/notifier.h>
19#include <linux/smp.h>
19#include <asm/processor.h> 20#include <asm/processor.h>
20 21
21 22
22static DEFINE_PER_CPU(struct llist_head, irq_work_list); 23static DEFINE_PER_CPU(struct llist_head, raised_list);
23static DEFINE_PER_CPU(int, irq_work_raised); 24static DEFINE_PER_CPU(struct llist_head, lazy_list);
24 25
25/* 26/*
26 * Claim the entry so that no one else will poke at it. 27 * Claim the entry so that no one else will poke at it.
@@ -55,12 +56,34 @@ void __weak arch_irq_work_raise(void)
55 */ 56 */
56} 57}
57 58
59#ifdef CONFIG_SMP
58/* 60/*
59 * Enqueue the irq_work @entry unless it's already pending 61 * Enqueue the irq_work @work on @cpu unless it's already pending
60 * somewhere. 62 * somewhere.
61 * 63 *
62 * Can be re-enqueued while the callback is still in progress. 64 * Can be re-enqueued while the callback is still in progress.
63 */ 65 */
66bool irq_work_queue_on(struct irq_work *work, int cpu)
67{
68 /* All work should have been flushed before going offline */
69 WARN_ON_ONCE(cpu_is_offline(cpu));
70
71 /* Arch remote IPI send/receive backend aren't NMI safe */
72 WARN_ON_ONCE(in_nmi());
73
74 /* Only queue if not already pending */
75 if (!irq_work_claim(work))
76 return false;
77
78 if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
79 arch_send_call_function_single_ipi(cpu);
80
81 return true;
82}
83EXPORT_SYMBOL_GPL(irq_work_queue_on);
84#endif
85
86/* Enqueue the irq work @work on the current CPU */
64bool irq_work_queue(struct irq_work *work) 87bool irq_work_queue(struct irq_work *work)
65{ 88{
66 /* Only queue if not already pending */ 89 /* Only queue if not already pending */
@@ -70,15 +93,13 @@ bool irq_work_queue(struct irq_work *work)
70 /* Queue the entry and raise the IPI if needed. */ 93 /* Queue the entry and raise the IPI if needed. */
71 preempt_disable(); 94 preempt_disable();
72 95
73 llist_add(&work->llnode, &__get_cpu_var(irq_work_list)); 96 /* If the work is "lazy", handle it from next tick if any */
74 97 if (work->flags & IRQ_WORK_LAZY) {
75 /* 98 if (llist_add(&work->llnode, &__get_cpu_var(lazy_list)) &&
76 * If the work is not "lazy" or the tick is stopped, raise the irq 99 tick_nohz_tick_stopped())
77 * work interrupt (if supported by the arch), otherwise, just wait 100 arch_irq_work_raise();
78 * for the next tick. 101 } else {
79 */ 102 if (llist_add(&work->llnode, &__get_cpu_var(raised_list)))
80 if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) {
81 if (!this_cpu_cmpxchg(irq_work_raised, 0, 1))
82 arch_irq_work_raise(); 103 arch_irq_work_raise();
83 } 104 }
84 105
@@ -90,10 +111,11 @@ EXPORT_SYMBOL_GPL(irq_work_queue);
90 111
91bool irq_work_needs_cpu(void) 112bool irq_work_needs_cpu(void)
92{ 113{
93 struct llist_head *this_list; 114 struct llist_head *raised, *lazy;
94 115
95 this_list = &__get_cpu_var(irq_work_list); 116 raised = &__get_cpu_var(raised_list);
96 if (llist_empty(this_list)) 117 lazy = &__get_cpu_var(lazy_list);
118 if (llist_empty(raised) && llist_empty(lazy))
97 return false; 119 return false;
98 120
99 /* All work should have been flushed before going offline */ 121 /* All work should have been flushed before going offline */
@@ -102,28 +124,18 @@ bool irq_work_needs_cpu(void)
102 return true; 124 return true;
103} 125}
104 126
105static void __irq_work_run(void) 127static void irq_work_run_list(struct llist_head *list)
106{ 128{
107 unsigned long flags; 129 unsigned long flags;
108 struct irq_work *work; 130 struct irq_work *work;
109 struct llist_head *this_list;
110 struct llist_node *llnode; 131 struct llist_node *llnode;
111 132
133 BUG_ON(!irqs_disabled());
112 134
113 /* 135 if (llist_empty(list))
114 * Reset the "raised" state right before we check the list because
115 * an NMI may enqueue after we find the list empty from the runner.
116 */
117 __this_cpu_write(irq_work_raised, 0);
118 barrier();
119
120 this_list = &__get_cpu_var(irq_work_list);
121 if (llist_empty(this_list))
122 return; 136 return;
123 137
124 BUG_ON(!irqs_disabled()); 138 llnode = llist_del_all(list);
125
126 llnode = llist_del_all(this_list);
127 while (llnode != NULL) { 139 while (llnode != NULL) {
128 work = llist_entry(llnode, struct irq_work, llnode); 140 work = llist_entry(llnode, struct irq_work, llnode);
129 141
@@ -148,6 +160,12 @@ static void __irq_work_run(void)
148 } 160 }
149} 161}
150 162
163static void __irq_work_run(void)
164{
165 irq_work_run_list(&__get_cpu_var(raised_list));
166 irq_work_run_list(&__get_cpu_var(lazy_list));
167}
168
151/* 169/*
152 * Run the irq_work entries on this cpu. Requires to be ran from hardirq 170 * Run the irq_work entries on this cpu. Requires to be ran from hardirq
153 * context with local IRQs disabled. 171 * context with local IRQs disabled.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3bdf01b494fe..7f3063c153d8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -684,10 +684,16 @@ static void wake_up_idle_cpu(int cpu)
684 684
685static bool wake_up_full_nohz_cpu(int cpu) 685static bool wake_up_full_nohz_cpu(int cpu)
686{ 686{
687 /*
688 * We just need the target to call irq_exit() and re-evaluate
689 * the next tick. The nohz full kick at least implies that.
690 * If needed we can still optimize that later with an
691 * empty IRQ.
692 */
687 if (tick_nohz_full_cpu(cpu)) { 693 if (tick_nohz_full_cpu(cpu)) {
688 if (cpu != smp_processor_id() || 694 if (cpu != smp_processor_id() ||
689 tick_nohz_tick_stopped()) 695 tick_nohz_tick_stopped())
690 smp_send_reschedule(cpu); 696 tick_nohz_full_kick_cpu(cpu);
691 return true; 697 return true;
692 } 698 }
693 699
@@ -734,10 +740,11 @@ bool sched_can_stop_tick(void)
734 740
735 rq = this_rq(); 741 rq = this_rq();
736 742
737 /* Make sure rq->nr_running update is visible after the IPI */ 743 /*
738 smp_rmb(); 744 * More than one running task need preemption.
739 745 * nr_running update is assumed to be visible
740 /* More than one running task need preemption */ 746 * after IPI is sent from wakers.
747 */
741 if (rq->nr_running > 1) 748 if (rq->nr_running > 1)
742 return false; 749 return false;
743 750
@@ -1568,9 +1575,7 @@ void scheduler_ipi(void)
1568 */ 1575 */
1569 preempt_fold_need_resched(); 1576 preempt_fold_need_resched();
1570 1577
1571 if (llist_empty(&this_rq()->wake_list) 1578 if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick())
1572 && !tick_nohz_full_cpu(smp_processor_id())
1573 && !got_nohz_idle_kick())
1574 return; 1579 return;
1575 1580
1576 /* 1581 /*
@@ -1587,7 +1592,6 @@ void scheduler_ipi(void)
1587 * somewhat pessimize the simple resched case. 1592 * somewhat pessimize the simple resched case.
1588 */ 1593 */
1589 irq_enter(); 1594 irq_enter();
1590 tick_nohz_full_check();
1591 sched_ttwu_pending(); 1595 sched_ttwu_pending();
1592 1596
1593 /* 1597 /*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 31cc02ebc54e..eb8567610295 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1221,9 +1221,15 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
1221#ifdef CONFIG_NO_HZ_FULL 1221#ifdef CONFIG_NO_HZ_FULL
1222 if (prev_nr < 2 && rq->nr_running >= 2) { 1222 if (prev_nr < 2 && rq->nr_running >= 2) {
1223 if (tick_nohz_full_cpu(rq->cpu)) { 1223 if (tick_nohz_full_cpu(rq->cpu)) {
1224 /* Order rq->nr_running write against the IPI */ 1224 /*
1225 smp_wmb(); 1225 * Tick is needed if more than one task runs on a CPU.
1226 smp_send_reschedule(rq->cpu); 1226 * Send the target an IPI to kick it out of nohz mode.
1227 *
1228 * We assume that IPI implies full memory barrier and the
1229 * new value of rq->nr_running is visible on reception
1230 * from the target.
1231 */
1232 tick_nohz_full_kick_cpu(rq->cpu);
1227 } 1233 }
1228 } 1234 }
1229#endif 1235#endif
diff --git a/kernel/smp.c b/kernel/smp.c
index 306f8180b0d5..a1812d184aed 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -3,6 +3,7 @@
3 * 3 *
4 * (C) Jens Axboe <jens.axboe@oracle.com> 2008 4 * (C) Jens Axboe <jens.axboe@oracle.com> 2008
5 */ 5 */
6#include <linux/irq_work.h>
6#include <linux/rcupdate.h> 7#include <linux/rcupdate.h>
7#include <linux/rculist.h> 8#include <linux/rculist.h>
8#include <linux/kernel.h> 9#include <linux/kernel.h>
@@ -210,6 +211,14 @@ void generic_smp_call_function_single_interrupt(void)
210 csd->func(csd->info); 211 csd->func(csd->info);
211 csd_unlock(csd); 212 csd_unlock(csd);
212 } 213 }
214
215 /*
216 * Handle irq works queued remotely by irq_work_queue_on().
217 * Smp functions above are typically synchronous so they
218 * better run first since some other CPUs may be busy waiting
219 * for them.
220 */
221 irq_work_run();
213} 222}
214 223
215/* 224/*
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 6558b7ac112d..3d63944a3eca 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -224,13 +224,15 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
224}; 224};
225 225
226/* 226/*
227 * Kick the current CPU if it's full dynticks in order to force it to 227 * Kick the CPU if it's full dynticks in order to force it to
228 * re-evaluate its dependency on the tick and restart it if necessary. 228 * re-evaluate its dependency on the tick and restart it if necessary.
229 */ 229 */
230void tick_nohz_full_kick(void) 230void tick_nohz_full_kick_cpu(int cpu)
231{ 231{
232 if (tick_nohz_full_cpu(smp_processor_id())) 232 if (!tick_nohz_full_cpu(cpu))
233 irq_work_queue(&__get_cpu_var(nohz_full_kick_work)); 233 return;
234
235 irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
234} 236}
235 237
236static void nohz_full_kick_ipi(void *info) 238static void nohz_full_kick_ipi(void *info)