aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2013-02-04 18:48:46 -0500
committerFrederic Weisbecker <fweisbec@gmail.com>2013-02-04 18:48:46 -0500
commit077931446b85e7858bf9dc0927cd116669b965d2 (patch)
tree02fbdf4a42b30b841a61aca399bd0ac6a5c308e6
parentf7c819c020db9796ae3a662b82a310617f92b15b (diff)
parent74876a98a87a115254b3a66a14b27320b7f0acaa (diff)
Merge branch 'nohz/printk-v8' into irq/core
Conflicts: kernel/irq_work.c Add support for printk in full dynticks CPU. * Don't stop tick with irq works pending. This fix is generally useful and concerns archs that can't raise self IPIs. * Flush irq works before CPU offlining. * Introduce "lazy" irq works that can wait for the next tick to be executed, unless it's stopped. * Implement klogd wake up using irq work. This removes the ad-hoc printk_tick()/printk_needs_cpu() hooks and make it working even in dynticks mode. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
-rw-r--r--include/linux/irq_work.h20
-rw-r--r--include/linux/printk.h3
-rw-r--r--include/linux/tick.h17
-rw-r--r--init/Kconfig1
-rw-r--r--kernel/irq_work.c112
-rw-r--r--kernel/printk.c36
-rw-r--r--kernel/time/tick-sched.c7
-rw-r--r--kernel/timer.c1
8 files changed, 147 insertions, 50 deletions
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index ce60c084635b..f5dbce50466e 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -3,6 +3,20 @@
3 3
4#include <linux/llist.h> 4#include <linux/llist.h>
5 5
6/*
7 * An entry can be in one of four states:
8 *
9 * free NULL, 0 -> {claimed} : free to be used
10 * claimed NULL, 3 -> {pending} : claimed to be enqueued
11 * pending next, 3 -> {busy} : queued, pending callback
12 * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
13 */
14
15#define IRQ_WORK_PENDING 1UL
16#define IRQ_WORK_BUSY 2UL
17#define IRQ_WORK_FLAGS 3UL
18#define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */
19
6struct irq_work { 20struct irq_work {
7 unsigned long flags; 21 unsigned long flags;
8 struct llist_node llnode; 22 struct llist_node llnode;
@@ -20,4 +34,10 @@ void irq_work_queue(struct irq_work *work);
20void irq_work_run(void); 34void irq_work_run(void);
21void irq_work_sync(struct irq_work *work); 35void irq_work_sync(struct irq_work *work);
22 36
37#ifdef CONFIG_IRQ_WORK
38bool irq_work_needs_cpu(void);
39#else
40static bool irq_work_needs_cpu(void) { return false; }
41#endif
42
23#endif /* _LINUX_IRQ_WORK_H */ 43#endif /* _LINUX_IRQ_WORK_H */
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 9afc01e5a0a6..86c4b6294713 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -98,9 +98,6 @@ int no_printk(const char *fmt, ...)
98extern asmlinkage __printf(1, 2) 98extern asmlinkage __printf(1, 2)
99void early_printk(const char *fmt, ...); 99void early_printk(const char *fmt, ...);
100 100
101extern int printk_needs_cpu(int cpu);
102extern void printk_tick(void);
103
104#ifdef CONFIG_PRINTK 101#ifdef CONFIG_PRINTK
105asmlinkage __printf(5, 0) 102asmlinkage __printf(5, 0)
106int vprintk_emit(int facility, int level, 103int vprintk_emit(int facility, int level,
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 1a6567b48492..553272e6af55 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -8,6 +8,8 @@
8 8
9#include <linux/clockchips.h> 9#include <linux/clockchips.h>
10#include <linux/irqflags.h> 10#include <linux/irqflags.h>
11#include <linux/percpu.h>
12#include <linux/hrtimer.h>
11 13
12#ifdef CONFIG_GENERIC_CLOCKEVENTS 14#ifdef CONFIG_GENERIC_CLOCKEVENTS
13 15
@@ -122,13 +124,26 @@ static inline int tick_oneshot_mode_active(void) { return 0; }
122#endif /* !CONFIG_GENERIC_CLOCKEVENTS */ 124#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
123 125
124# ifdef CONFIG_NO_HZ 126# ifdef CONFIG_NO_HZ
127DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched);
128
129static inline int tick_nohz_tick_stopped(void)
130{
131 return __this_cpu_read(tick_cpu_sched.tick_stopped);
132}
133
125extern void tick_nohz_idle_enter(void); 134extern void tick_nohz_idle_enter(void);
126extern void tick_nohz_idle_exit(void); 135extern void tick_nohz_idle_exit(void);
127extern void tick_nohz_irq_exit(void); 136extern void tick_nohz_irq_exit(void);
128extern ktime_t tick_nohz_get_sleep_length(void); 137extern ktime_t tick_nohz_get_sleep_length(void);
129extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); 138extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
130extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); 139extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
131# else 140
141# else /* !CONFIG_NO_HZ */
142static inline int tick_nohz_tick_stopped(void)
143{
144 return 0;
145}
146
132static inline void tick_nohz_idle_enter(void) { } 147static inline void tick_nohz_idle_enter(void) { }
133static inline void tick_nohz_idle_exit(void) { } 148static inline void tick_nohz_idle_exit(void) { }
134 149
diff --git a/init/Kconfig b/init/Kconfig
index e3227d7ba35d..a98e1acc122d 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1259,6 +1259,7 @@ config HOTPLUG
1259config PRINTK 1259config PRINTK
1260 default y 1260 default y
1261 bool "Enable support for printk" if EXPERT 1261 bool "Enable support for printk" if EXPERT
1262 select IRQ_WORK
1262 help 1263 help
1263 This option enables normal printk support. Removing it 1264 This option enables normal printk support. Removing it
1264 eliminates most of the message strings from the kernel image 1265 eliminates most of the message strings from the kernel image
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index c9d7478e4889..55fcce6065cf 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -12,22 +12,15 @@
12#include <linux/percpu.h> 12#include <linux/percpu.h>
13#include <linux/hardirq.h> 13#include <linux/hardirq.h>
14#include <linux/irqflags.h> 14#include <linux/irqflags.h>
15#include <linux/sched.h>
16#include <linux/tick.h>
17#include <linux/cpu.h>
18#include <linux/notifier.h>
15#include <asm/processor.h> 19#include <asm/processor.h>
16 20
17/*
18 * An entry can be in one of four states:
19 *
20 * free NULL, 0 -> {claimed} : free to be used
21 * claimed NULL, 3 -> {pending} : claimed to be enqueued
22 * pending next, 3 -> {busy} : queued, pending callback
23 * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
24 */
25
26#define IRQ_WORK_PENDING 1UL
27#define IRQ_WORK_BUSY 2UL
28#define IRQ_WORK_FLAGS 3UL
29 21
30static DEFINE_PER_CPU(struct llist_head, irq_work_list); 22static DEFINE_PER_CPU(struct llist_head, irq_work_list);
23static DEFINE_PER_CPU(int, irq_work_raised);
31 24
32/* 25/*
33 * Claim the entry so that no one else will poke at it. 26 * Claim the entry so that no one else will poke at it.
@@ -70,8 +63,6 @@ void __weak arch_irq_work_raise(void)
70 */ 63 */
71void irq_work_queue(struct irq_work *work) 64void irq_work_queue(struct irq_work *work)
72{ 65{
73 bool empty;
74
75 /* Only queue if not already pending */ 66 /* Only queue if not already pending */
76 if (!irq_work_claim(work)) 67 if (!irq_work_claim(work))
77 return; 68 return;
@@ -79,30 +70,55 @@ void irq_work_queue(struct irq_work *work)
79 /* Queue the entry and raise the IPI if needed. */ 70 /* Queue the entry and raise the IPI if needed. */
80 preempt_disable(); 71 preempt_disable();
81 72
82 empty = llist_add(&work->llnode, &__get_cpu_var(irq_work_list)); 73 llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
83 /* The list was empty, raise self-interrupt to start processing. */ 74
84 if (empty) 75 /*
85 arch_irq_work_raise(); 76 * If the work is not "lazy" or the tick is stopped, raise the irq
77 * work interrupt (if supported by the arch), otherwise, just wait
78 * for the next tick.
79 */
80 if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) {
81 if (!this_cpu_cmpxchg(irq_work_raised, 0, 1))
82 arch_irq_work_raise();
83 }
86 84
87 preempt_enable(); 85 preempt_enable();
88} 86}
89EXPORT_SYMBOL_GPL(irq_work_queue); 87EXPORT_SYMBOL_GPL(irq_work_queue);
90 88
91/* 89bool irq_work_needs_cpu(void)
92 * Run the irq_work entries on this cpu. Requires to be ran from hardirq 90{
93 * context with local IRQs disabled. 91 struct llist_head *this_list;
94 */ 92
95void irq_work_run(void) 93 this_list = &__get_cpu_var(irq_work_list);
94 if (llist_empty(this_list))
95 return false;
96
97 /* All work should have been flushed before going offline */
98 WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
99
100 return true;
101}
102
103static void __irq_work_run(void)
96{ 104{
105 unsigned long flags;
97 struct irq_work *work; 106 struct irq_work *work;
98 struct llist_head *this_list; 107 struct llist_head *this_list;
99 struct llist_node *llnode; 108 struct llist_node *llnode;
100 109
110
111 /*
112 * Reset the "raised" state right before we check the list because
113 * an NMI may enqueue after we find the list empty from the runner.
114 */
115 __this_cpu_write(irq_work_raised, 0);
116 barrier();
117
101 this_list = &__get_cpu_var(irq_work_list); 118 this_list = &__get_cpu_var(irq_work_list);
102 if (llist_empty(this_list)) 119 if (llist_empty(this_list))
103 return; 120 return;
104 121
105 BUG_ON(!in_irq());
106 BUG_ON(!irqs_disabled()); 122 BUG_ON(!irqs_disabled());
107 123
108 llnode = llist_del_all(this_list); 124 llnode = llist_del_all(this_list);
@@ -118,15 +134,27 @@ void irq_work_run(void)
118 * to claim that work don't rely on us to handle their data 134 * to claim that work don't rely on us to handle their data
119 * while we are in the middle of the func. 135 * while we are in the middle of the func.
120 */ 136 */
121 xchg(&work->flags, IRQ_WORK_BUSY); 137 flags = work->flags & ~IRQ_WORK_PENDING;
138 xchg(&work->flags, flags);
139
122 work->func(work); 140 work->func(work);
123 /* 141 /*
124 * Clear the BUSY bit and return to the free state if 142 * Clear the BUSY bit and return to the free state if
125 * no-one else claimed it meanwhile. 143 * no-one else claimed it meanwhile.
126 */ 144 */
127 (void)cmpxchg(&work->flags, IRQ_WORK_BUSY, 0); 145 (void)cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
128 } 146 }
129} 147}
148
149/*
150 * Run the irq_work entries on this cpu. Requires to be ran from hardirq
151 * context with local IRQs disabled.
152 */
153void irq_work_run(void)
154{
155 BUG_ON(!in_irq());
156 __irq_work_run();
157}
130EXPORT_SYMBOL_GPL(irq_work_run); 158EXPORT_SYMBOL_GPL(irq_work_run);
131 159
132/* 160/*
@@ -141,3 +169,35 @@ void irq_work_sync(struct irq_work *work)
141 cpu_relax(); 169 cpu_relax();
142} 170}
143EXPORT_SYMBOL_GPL(irq_work_sync); 171EXPORT_SYMBOL_GPL(irq_work_sync);
172
173#ifdef CONFIG_HOTPLUG_CPU
174static int irq_work_cpu_notify(struct notifier_block *self,
175 unsigned long action, void *hcpu)
176{
177 long cpu = (long)hcpu;
178
179 switch (action) {
180 case CPU_DYING:
181 /* Called from stop_machine */
182 if (WARN_ON_ONCE(cpu != smp_processor_id()))
183 break;
184 __irq_work_run();
185 break;
186 default:
187 break;
188 }
189 return NOTIFY_OK;
190}
191
192static struct notifier_block cpu_notify;
193
194static __init int irq_work_init_cpu_notifier(void)
195{
196 cpu_notify.notifier_call = irq_work_cpu_notify;
197 cpu_notify.priority = 0;
198 register_cpu_notifier(&cpu_notify);
199 return 0;
200}
201device_initcall(irq_work_init_cpu_notifier);
202
203#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/kernel/printk.c b/kernel/printk.c
index 357f714ddd49..0b31715f335a 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -42,6 +42,7 @@
42#include <linux/notifier.h> 42#include <linux/notifier.h>
43#include <linux/rculist.h> 43#include <linux/rculist.h>
44#include <linux/poll.h> 44#include <linux/poll.h>
45#include <linux/irq_work.h>
45 46
46#include <asm/uaccess.h> 47#include <asm/uaccess.h>
47 48
@@ -1967,30 +1968,32 @@ int is_console_locked(void)
1967static DEFINE_PER_CPU(int, printk_pending); 1968static DEFINE_PER_CPU(int, printk_pending);
1968static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf); 1969static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf);
1969 1970
1970void printk_tick(void) 1971static void wake_up_klogd_work_func(struct irq_work *irq_work)
1971{ 1972{
1972 if (__this_cpu_read(printk_pending)) { 1973 int pending = __this_cpu_xchg(printk_pending, 0);
1973 int pending = __this_cpu_xchg(printk_pending, 0); 1974
1974 if (pending & PRINTK_PENDING_SCHED) { 1975 if (pending & PRINTK_PENDING_SCHED) {
1975 char *buf = __get_cpu_var(printk_sched_buf); 1976 char *buf = __get_cpu_var(printk_sched_buf);
1976 printk(KERN_WARNING "[sched_delayed] %s", buf); 1977 printk(KERN_WARNING "[sched_delayed] %s", buf);
1977 }
1978 if (pending & PRINTK_PENDING_WAKEUP)
1979 wake_up_interruptible(&log_wait);
1980 } 1978 }
1981}
1982 1979
1983int printk_needs_cpu(int cpu) 1980 if (pending & PRINTK_PENDING_WAKEUP)
1984{ 1981 wake_up_interruptible(&log_wait);
1985 if (cpu_is_offline(cpu))
1986 printk_tick();
1987 return __this_cpu_read(printk_pending);
1988} 1982}
1989 1983
1984static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
1985 .func = wake_up_klogd_work_func,
1986 .flags = IRQ_WORK_LAZY,
1987};
1988
1990void wake_up_klogd(void) 1989void wake_up_klogd(void)
1991{ 1990{
1992 if (waitqueue_active(&log_wait)) 1991 preempt_disable();
1992 if (waitqueue_active(&log_wait)) {
1993 this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); 1993 this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
1994 irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
1995 }
1996 preempt_enable();
1994} 1997}
1995 1998
1996static void console_cont_flush(char *text, size_t size) 1999static void console_cont_flush(char *text, size_t size)
@@ -2471,6 +2474,7 @@ int printk_sched(const char *fmt, ...)
2471 va_end(args); 2474 va_end(args);
2472 2475
2473 __this_cpu_or(printk_pending, PRINTK_PENDING_SCHED); 2476 __this_cpu_or(printk_pending, PRINTK_PENDING_SCHED);
2477 irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
2474 local_irq_restore(flags); 2478 local_irq_restore(flags);
2475 2479
2476 return r; 2480 return r;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d58e552d9fd1..fb8e5e469d1c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -20,6 +20,7 @@
20#include <linux/profile.h> 20#include <linux/profile.h>
21#include <linux/sched.h> 21#include <linux/sched.h>
22#include <linux/module.h> 22#include <linux/module.h>
23#include <linux/irq_work.h>
23 24
24#include <asm/irq_regs.h> 25#include <asm/irq_regs.h>
25 26
@@ -28,7 +29,7 @@
28/* 29/*
29 * Per cpu nohz control structure 30 * Per cpu nohz control structure
30 */ 31 */
31static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); 32DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
32 33
33/* 34/*
34 * The time, when the last jiffy update happened. Protected by jiffies_lock. 35 * The time, when the last jiffy update happened. Protected by jiffies_lock.
@@ -331,8 +332,8 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
331 time_delta = timekeeping_max_deferment(); 332 time_delta = timekeeping_max_deferment();
332 } while (read_seqretry(&jiffies_lock, seq)); 333 } while (read_seqretry(&jiffies_lock, seq));
333 334
334 if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) || 335 if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) ||
335 arch_needs_cpu(cpu)) { 336 arch_needs_cpu(cpu) || irq_work_needs_cpu()) {
336 next_jiffies = last_jiffies + 1; 337 next_jiffies = last_jiffies + 1;
337 delta_jiffies = 1; 338 delta_jiffies = 1;
338 } else { 339 } else {
diff --git a/kernel/timer.c b/kernel/timer.c
index 367d00858482..ff3b5165737b 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1351,7 +1351,6 @@ void update_process_times(int user_tick)
1351 account_process_tick(p, user_tick); 1351 account_process_tick(p, user_tick);
1352 run_local_timers(); 1352 run_local_timers();
1353 rcu_check_callbacks(cpu, user_tick); 1353 rcu_check_callbacks(cpu, user_tick);
1354 printk_tick();
1355#ifdef CONFIG_IRQ_WORK 1354#ifdef CONFIG_IRQ_WORK
1356 if (in_irq()) 1355 if (in_irq())
1357 irq_work_run(); 1356 irq_work_run();